Fossil

Check-in [301700a224]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:New --compress option to "rebuild" causes more agressive delta compression which can result in a 30% or better size reduction in the database file, with corresponding speedup of cloning.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 301700a224211fac31f9a58d7e1ebdec32e95661
User & Date: drh 2011-03-01 13:20:54.192
Context
2011-03-01
17:45
Fix the content_deltify() routine to always return an integer value. This fixes an error in the previous check-in (the --compress checkin). Also fix a help comment on the test-3-way-merge command. ... (check-in: 60c2feea08 user: drh tags: trunk)
13:20
New --compress option to "rebuild" causes more agressive delta compression which can result in a 30% or better size reduction in the database file, with corresponding speedup of cloning. ... (check-in: 301700a224 user: drh tags: trunk)
02:11
Remove a stray NIL character from the makemake.tcl file and add a "+" that was omitted from the previous edit. ... (check-in: a668e2ad73 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/content.c.
744
745
746
747
748
749
750
751
752


753
754
755
756
757


758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793

794
795
796
797

798
799
800
801
802
803
804
** the source of the delta.  It is OK to delta private->private and
** public->private and public->public.  Just no private->public delta.
**
** If srcid is a delta that depends on rid, then srcid is
** converted to undeltaed text.
**
** If either rid or srcid contain less than 50 bytes, or if the
** resulting delta does not achieve a compression of at least 25% on
** its own the rid is left untouched.


*/
void content_deltify(int rid, int srcid, int force){
  int s;
  Blob data, src, delta;
  Stmt s1, s2;


  if( srcid==rid ) return;
  if( !force && findSrcid(rid)>0 ) return;
  if( content_is_private(srcid) && !content_is_private(rid) ){
    return;
  }
  s = srcid;
  while( (s = findSrcid(s))>0 ){
    if( s==rid ){
      content_undelta(srcid);
      break;
    }
  }
  content_get(srcid, &src);
  if( blob_size(&src)<50 ){
    blob_reset(&src);
    return;
  }
  content_get(rid, &data);
  if( blob_size(&data)<50 ){
    blob_reset(&src);
    blob_reset(&data);
    return;
  }
  blob_delta_create(&src, &data, &delta);
  if( blob_size(&delta) < blob_size(&data)*0.75 ){
    blob_compress(&delta, &delta);
    db_prepare(&s1, "UPDATE blob SET content=:data WHERE rid=%d", rid);
    db_prepare(&s2, "REPLACE INTO delta(rid,srcid)VALUES(%d,%d)", rid, srcid);
    db_bind_blob(&s1, ":data", &delta);
    db_begin_transaction();
    db_exec(&s1);
    db_exec(&s2);
    db_end_transaction(0);
    db_finalize(&s1);
    db_finalize(&s2);
    verify_before_commit(rid);

  }
  blob_reset(&src);
  blob_reset(&data);
  blob_reset(&delta);

}

/*
** COMMAND:  test-content-deltify
**
** Convert the content at RID into a delta from SRCID.
*/







|
|
>
>

|



>
>



|











|





|


|











>




>







744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
** the source of the delta.  It is OK to delta private->private and
** public->private and public->public.  Just no private->public delta.
**
** If srcid is a delta that depends on rid, then srcid is
** converted to undeltaed text.
**
** If either rid or srcid contain less than 50 bytes, or if the
** resulting delta does not achieve a compression of at least 25% 
** the rid is left untouched.
**
** Return 1 if a delta is made and 0 if no delta occurs.
*/
int content_deltify(int rid, int srcid, int force){
  int s;
  Blob data, src, delta;
  Stmt s1, s2;
  int rc = 0;

  if( srcid==rid ) return;
  if( !force && findSrcid(rid)>0 ) return;
  if( content_is_private(srcid) && !content_is_private(rid) ){
    return 0;
  }
  s = srcid;
  while( (s = findSrcid(s))>0 ){
    if( s==rid ){
      content_undelta(srcid);
      break;
    }
  }
  content_get(srcid, &src);
  if( blob_size(&src)<50 ){
    blob_reset(&src);
    return 0;
  }
  content_get(rid, &data);
  if( blob_size(&data)<50 ){
    blob_reset(&src);
    blob_reset(&data);
    return 0;
  }
  blob_delta_create(&src, &data, &delta);
  if( blob_size(&delta) <= blob_size(&data)*0.75 ){
    blob_compress(&delta, &delta);
    db_prepare(&s1, "UPDATE blob SET content=:data WHERE rid=%d", rid);
    db_prepare(&s2, "REPLACE INTO delta(rid,srcid)VALUES(%d,%d)", rid, srcid);
    db_bind_blob(&s1, ":data", &delta);
    db_begin_transaction();
    db_exec(&s1);
    db_exec(&s2);
    db_end_transaction(0);
    db_finalize(&s1);
    db_finalize(&s2);
    verify_before_commit(rid);
    rc = 1;
  }
  blob_reset(&src);
  blob_reset(&data);
  blob_reset(&delta);
  return rc;
}

/*
** COMMAND:  test-content-deltify
**
** Convert the content at RID into a delta from SRCID.
*/
Changes to src/manifest.c.
1251
1252
1253
1254
1255
1256
1257
1258

1259
1260
1261
1262
1263
1264
1265
  }
  if( fetch_baseline(pParent, 0) || fetch_baseline(pChild, 0) ){
    manifest_destroy(*ppOther);
    return;
  }

  /* Try to make the parent manifest a delta from the child, if that
  ** is an appropriate thing to do.

  */
  if( (pParent->zBaseline==0)==(pChild->zBaseline==0) ){
    content_deltify(pid, cid, 0); 
  }else if( pChild->zBaseline==0 && pParent->zBaseline!=0 ){
    content_deltify(pParent->pBaseline->rid, cid, 0);
  }








|
>







1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
  }
  if( fetch_baseline(pParent, 0) || fetch_baseline(pChild, 0) ){
    manifest_destroy(*ppOther);
    return;
  }

  /* Try to make the parent manifest a delta from the child, if that
  ** is an appropriate thing to do.  For a new baseline, make the 
  ** previoius baseline a delta from the current baseline.
  */
  if( (pParent->zBaseline==0)==(pChild->zBaseline==0) ){
    content_deltify(pid, cid, 0); 
  }else if( pChild->zBaseline==0 && pParent->zBaseline!=0 ){
    content_deltify(pParent->pBaseline->rid, cid, 0);
  }

Changes to src/rebuild.c.
337
338
339
340
341
342
343

























































344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361

362
363
364
365
366
367
368
369
370
371
372
373

374
375
376
377
378
379

380
381
382
383
384
385
386
    percent_complete((processCnt*1000)/totalSize);
  }
  if(!g.fQuiet && ttyOutput ){
    printf("\n");
  }
  return errCnt;
}


























































/*
** COMMAND:  rebuild
**
** Usage: %fossil rebuild ?REPOSITORY?
**
** Reconstruct the named repository database from the core
** records.  Run this command after updating the fossil
** executable in a way that changes the database schema.
**
** Options:
**
**   --noverify    Skip the verification of changes to the BLOB table
**   --force       Force the rebuild to complete even if errors are seen
**   --randomize   Scan artifacts in a random order
**   --cluster     Compute clusters for unclustered artifacts
**   --pagesize N  Set the database pagesize to N. (512..65536 and power of 2)
**   --wal         Set Write-Ahead-Log journalling mode on the database

**   --vacuum      Run VACUUM on the database after rebuilding
*/
void rebuild_database(void){
  int forceFlag;
  int randomizeFlag;
  int errCnt;
  int omitVerify;
  int doClustering;
  const char *zPagesize;
  int newPagesize = 0;
  int activateWal;
  int runVacuum;


  omitVerify = find_option("noverify",0,0)!=0;
  forceFlag = find_option("force","f",0)!=0;
  randomizeFlag = find_option("randomize", 0, 0)!=0;
  doClustering = find_option("cluster", 0, 0)!=0;
  runVacuum = find_option("vacuum",0,0)!=0;

  zPagesize = find_option("pagesize",0,1);
  if( zPagesize ){
    newPagesize = atoi(zPagesize);
    if( newPagesize<512 || newPagesize>65536
        || (newPagesize&(newPagesize-1))!=0
    ){
      fossil_fatal("page size must be a power of two between 512 and 65536");







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


















>












>






>







337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
    percent_complete((processCnt*1000)/totalSize);
  }
  if(!g.fQuiet && ttyOutput ){
    printf("\n");
  }
  return errCnt;
}

/*
** Attempt to convert more full-text blobs into delta-blobs for
** storage efficiency.
*/
static void extra_deltification(void){
  Stmt q;
  int topid, previd, rid;
  int prevfnid, fnid;
  db_begin_transaction();
  db_prepare(&q,
     "SELECT rid FROM event, blob"
     " WHERE blob.rid=event.objid"
     "   AND event.type='ci'"
     "   AND NOT EXISTS(SELECT 1 FROM delta WHERE rid=blob.rid)"
     " ORDER BY event.mtime DESC"
  );
  topid = previd = 0;
  while( db_step(&q)==SQLITE_ROW ){
    rid = db_column_int(&q, 0);
    if( topid==0 ){
      topid = previd = rid;
    }else{
      if( content_deltify(rid, previd, 0)==0 && previd!=topid ){
        content_deltify(rid, topid, 0);
      }
      previd = rid;
    }
  }
  db_finalize(&q);

  db_prepare(&q,
     "SELECT blob.rid, mlink.fnid FROM blob, mlink, plink"
     " WHERE NOT EXISTS(SELECT 1 FROM delta WHERE rid=blob.rid)"
     "   AND mlink.fid=blob.rid"
     "   AND mlink.mid=plink.cid"
     "   AND plink.cid=mlink.mid"
     " ORDER BY mlink.fnid, plink.mtime DESC"
  );
  prevfnid = 0;
  while( db_step(&q)==SQLITE_ROW ){
    rid = db_column_int(&q, 0);
    fnid = db_column_int(&q, 1);
    if( prevfnid!=fnid ){
      prevfnid = fnid;
      topid = previd = rid;
    }else{
      if( content_deltify(rid, previd, 0)==0 && previd!=topid ){
        content_deltify(rid, topid, 0);
      }
      previd = rid;
    }
  }
  db_finalize(&q);

  db_end_transaction(0);
}

/*
** COMMAND:  rebuild
**
** Usage: %fossil rebuild ?REPOSITORY?
**
** Reconstruct the named repository database from the core
** records.  Run this command after updating the fossil
** executable in a way that changes the database schema.
**
** Options:
**
**   --noverify    Skip the verification of changes to the BLOB table
**   --force       Force the rebuild to complete even if errors are seen
**   --randomize   Scan artifacts in a random order
**   --cluster     Compute clusters for unclustered artifacts
**   --pagesize N  Set the database pagesize to N. (512..65536 and power of 2)
**   --wal         Set Write-Ahead-Log journalling mode on the database
**   --compress    Strive to make the database as small as possible
**   --vacuum      Run VACUUM on the database after rebuilding
*/
void rebuild_database(void){
  int forceFlag;
  int randomizeFlag;
  int errCnt;
  int omitVerify;
  int doClustering;
  const char *zPagesize;
  int newPagesize = 0;
  int activateWal;
  int runVacuum;
  int runCompress;

  omitVerify = find_option("noverify",0,0)!=0;
  forceFlag = find_option("force","f",0)!=0;
  randomizeFlag = find_option("randomize", 0, 0)!=0;
  doClustering = find_option("cluster", 0, 0)!=0;
  runVacuum = find_option("vacuum",0,0)!=0;
  runCompress = find_option("compress",0,0)!=0;
  zPagesize = find_option("pagesize",0,1);
  if( zPagesize ){
    newPagesize = atoi(zPagesize);
    if( newPagesize<512 || newPagesize>65536
        || (newPagesize&(newPagesize-1))!=0
    ){
      fossil_fatal("page size must be a power of two between 512 and 65536");
406
407
408
409
410
411
412





413
414

415
416
417
418
419
420
421
    CONTENT_SCHEMA, AUX_SCHEMA
  );
  if( errCnt && !forceFlag ){
    printf("%d errors. Rolling back changes. Use --force to force a commit.\n",
            errCnt);
    db_end_transaction(1);
  }else{





    if( omitVerify ) verify_cancel();
    db_end_transaction(0);

    db_close(0);
    db_open_repository(g.zRepositoryName);
    if( newPagesize ){
      db_multi_exec("PRAGMA page_size=%d", newPagesize);
      runVacuum = 1;
    }
    if( runVacuum ){







>
>
>
>
>


>







466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
    CONTENT_SCHEMA, AUX_SCHEMA
  );
  if( errCnt && !forceFlag ){
    printf("%d errors. Rolling back changes. Use --force to force a commit.\n",
            errCnt);
    db_end_transaction(1);
  }else{
    if( runCompress ){
      printf("Extra delta compression... "); fflush(stdout);
      extra_deltification();
      runVacuum = 1;
    }
    if( omitVerify ) verify_cancel();
    db_end_transaction(0);
    if( runCompress ) printf("done\n");
    db_close(0);
    db_open_repository(g.zRepositoryName);
    if( newPagesize ){
      db_multi_exec("PRAGMA page_size=%d", newPagesize);
      runVacuum = 1;
    }
    if( runVacuum ){