Fossil

Check-in [5f0201030c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Dramatic performance improvement for "fossil deconstruct" and "fossil reconstruct" on large repositories. Add progress information for "fossil reconstruct". Possibly related to ticket [2a1e8e3c4b0b39e08fdde0]. Fix for ticket [76d3ecfdab577bdf843].
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 5f0201030cbf365207d67c125b433bb17e2574fc
User & Date: drh 2010-10-03 20:00:14.000
Original Comment: Dramatic performance improvement for "fossil deconstruct" and "fossil reconstruct" on large repositories. Add progress information for "fossil reconstruct". Possibly related to ticket [2a1e8e3c4b0b39e08fdde0]
References
2010-10-03
23:53 Fixed ticket [76d3ecfdab]: verify_at_commit should print status plus 2 other changes ... (artifact: aad33bd92a user: drh)
Context
2010-10-03
23:31
Make the R card of manifests truely optional. It is always generated on manifests created by Fossil itself, but 3rd party import tools might choose to omit the R card as a simplification. Ticket [a32ff1eddb6ac1f499]. ... (check-in: aab38ef02f user: drh tags: trunk)
20:34
merge from trunk ... (check-in: 287dd50e7b user: wolfgang tags: wolfgangFormat2CSS_2)
20:00
Dramatic performance improvement for "fossil deconstruct" and "fossil reconstruct" on large repositories. Add progress information for "fossil reconstruct". Possibly related to ticket [2a1e8e3c4b0b39e08fdde0]. Fix for ticket [76d3ecfdab577bdf843]. ... (check-in: 5f0201030c user: drh tags: trunk)
19:01
For "fossil rebuild" increment the progress counter after each artifact is processed, rather than waiting for its delta children to be processed, in order to give a more uniform progress indication. Possibly related to ticket [2a1e8e3c4b0b39e08fdde]. ... (check-in: ae000c23fa user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/rebuild.c.
70
71
72
73
74
75
76
77

78
79
80
81
82



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102











103
104
105
106
107
108
109
@ CREATE TABLE IF NOT EXISTS concealed(
@   hash TEXT PRIMARY KEY,
@   content TEXT
@ );
;

/*
** Variables used for progress information

*/
static int totalSize;       /* Total number of artifacts to process */
static int processCnt;      /* Number processed so far */
static int ttyOutput;       /* Do progress output */
static Bag bagDone;         /* Bag of records rebuilt */




/*
** Called after each artifact is processed
*/
static void rebuild_step_done(rid){
  /* assert( bag_find(&bagDone, rid)==0 ); */
  bag_insert(&bagDone, rid);
  if( ttyOutput ){
    processCnt++;
    if (!g.fQuiet) {
      printf("%d (%d%%)...\r", processCnt, (processCnt*100/totalSize));
      fflush(stdout);
    }
  }
}

/*
** Rebuild cross-referencing information for the artifact
** rid with content pBase and all of its descendants.  This
** routine clears the content buffer before returning.











*/
static void rebuild_step(int rid, int size, Blob *pBase){
  static Stmt q1;
  Bag children;
  Blob copy;
  Blob *pUse;
  int nChild, i, cid;







|
>





>
>
>




















>
>
>
>
>
>
>
>
>
>
>







70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
@ CREATE TABLE IF NOT EXISTS concealed(
@   hash TEXT PRIMARY KEY,
@   content TEXT
@ );
;

/*
** Variables used to store state information about an on-going "rebuild"
** or "deconstruct".
*/
static int totalSize;       /* Total number of artifacts to process */
static int processCnt;      /* Number processed so far */
static int ttyOutput;       /* Do progress output */
static Bag bagDone;         /* Bag of records rebuilt */

static char *zFNameFormat;  /* Format string for filenames on deconstruct */
static int prefixLength;    /* Length of directory prefix for deconstruct */

/*
** Called after each artifact is processed
*/
static void rebuild_step_done(rid){
  /* assert( bag_find(&bagDone, rid)==0 ); */
  bag_insert(&bagDone, rid);
  if( ttyOutput ){
    processCnt++;
    if (!g.fQuiet) {
      printf("%d (%d%%)...\r", processCnt, (processCnt*100/totalSize));
      fflush(stdout);
    }
  }
}

/*
** Rebuild cross-referencing information for the artifact
** rid with content pBase and all of its descendants.  This
** routine clears the content buffer before returning.
**
** If the zFNameFormat variable is set, then this routine is
** called to run "fossil deconstruct" instead of the usual
** "fossil rebuild".  In that case, instead of rebuilding the
** cross-referencing information, write the file content out
** to the approriate directory.
**
** In both cases, this routine automatically recurses to process
** other artifacts that are deltas off of the current artifact.
** This is the most efficient way to extract all of the original
** artifact content from the Fossil repository.
*/
static void rebuild_step(int rid, int size, Blob *pBase){
  static Stmt q1;
  Bag children;
  Blob copy;
  Blob *pUse;
  int nChild, i, cid;
131
132
133
134
135
136
137


138








139
140
141
142
143
144
145
  /* Crosslink the artifact */
  if( nChild==0 ){
    pUse = pBase;
  }else{
    blob_copy(&copy, pBase);
    pUse = ©
  }


  manifest_crosslink(rid, pUse);








  blob_reset(pUse);
  rebuild_step_done(rid);

  /* Call all children recursively */
  for(cid=bag_first(&children), i=1; cid; cid=bag_next(&children, cid), i++){
    Stmt q2;
    int sz;







>
>
|
>
>
>
>
>
>
>
>







146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
  /* Crosslink the artifact */
  if( nChild==0 ){
    pUse = pBase;
  }else{
    blob_copy(&copy, pBase);
    pUse = ©
  }
  if( zFNameFormat==0 ){
    /* We are doing "fossil rebuild" */
    manifest_crosslink(rid, pUse);
  }else{
    /* We are doing "fossil deconstruct" */
    char *zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid);
    char *zFile = mprintf(zFNameFormat, zUuid, zUuid+prefixLength);
    blob_write_to_file(pUse,zFile);
    free(zFile);
    free(zUuid);
  }
  blob_reset(pUse);
  rebuild_step_done(rid);

  /* Call all children recursively */
  for(cid=bag_first(&children), i=1; cid; cid=bag_next(&children, cid), i++){
    Stmt q2;
    int sz;
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414

415
416
417
418
419
420
421
    db_multi_exec("VACUUM;");
  }else{
    rebuild_db(0, 1);
    db_end_transaction(0);
  }
}

/* 
** help function for reconstruct for recursiv directory
** reading.
*/
void recon_read_dir(char * zPath){
  DIR *d;
  struct dirent *pEntry;
  Blob aContent; /* content of the just read artifact */


  d = opendir(zPath);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      Blob path;
      char *zSubpath;








|
|
|

|



>







425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
    db_multi_exec("VACUUM;");
  }else{
    rebuild_db(0, 1);
    db_end_transaction(0);
  }
}

/*
** Recursively read all files from the directory zPath and install
** every file read as a new artifact in the repository.
*/
void recon_read_dir(char *zPath){
  DIR *d;
  struct dirent *pEntry;
  Blob aContent; /* content of the just read artifact */
  static int nFileRead = 0;

  d = opendir(zPath);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      Blob path;
      char *zSubpath;

432
433
434
435
436
437
438


439
440
441
442
443
444
445
        fossil_panic("some unknown error occurred while reading \"%s\"", 
                     blob_str(&path));
      }
      content_put(&aContent, 0, 0);
      blob_reset(&path);
      blob_reset(&aContent);
      free(zSubpath);


    }
  }else {
    fossil_panic("encountered error %d while trying to open \"%s\".",
                  errno, g.argv[3]);
  }
}








>
>







458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
        fossil_panic("some unknown error occurred while reading \"%s\"", 
                     blob_str(&path));
      }
      content_put(&aContent, 0, 0);
      blob_reset(&path);
      blob_reset(&aContent);
      free(zSubpath);
      printf("\r%d", ++nFileRead);
      fflush(stdout);
    }
  }else {
    fossil_panic("encountered error %d while trying to open \"%s\".",
                  errno, g.argv[3]);
  }
}

465
466
467
468
469
470
471

472

473
474
475






476
477
478
479
480
481
482
  }
  db_create_repository(g.argv[2]);
  db_open_repository(g.argv[2]);
  db_open_config(0);
  db_begin_transaction();
  db_initial_setup(0, 0, 1);


  recon_read_dir(g.argv[3]);


  rebuild_db(0, 1);







  db_end_transaction(0);
  printf("project-id: %s\n", db_get("project-code", 0));
  printf("server-id: %s\n", db_get("server-code", 0));
  zPassword = db_text(0, "SELECT pw FROM user WHERE login=%Q", g.zLogin);
  printf("admin-user: %s (initial password is \"%s\")\n", g.zLogin, zPassword);
}








>

>



>
>
>
>
>
>







493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
  }
  db_create_repository(g.argv[2]);
  db_open_repository(g.argv[2]);
  db_open_config(0);
  db_begin_transaction();
  db_initial_setup(0, 0, 1);

  printf("Reading files from directory \"%s\"...\n", g.argv[3]);
  recon_read_dir(g.argv[3]);
  printf("\nBuilding the Fossil repository...\n");

  rebuild_db(0, 1);

  /* Skip the verify_before_commit() step on a reconstruct.  Most artifacts
  ** will have been changed and verification therefore takes a really, really
  ** long time.
  */
  verify_cancel();
  
  db_end_transaction(0);
  printf("project-id: %s\n", db_get("project-code", 0));
  printf("server-id: %s\n", db_get("server-code", 0));
  zPassword = db_text(0, "SELECT pw FROM user WHERE login=%Q", g.zLogin);
  printf("admin-user: %s (initial password is \"%s\")\n", g.zLogin, zPassword);
}

491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535





536
537























538
539


540
541



542
543
544

545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563

564
565
** AABBBBBBBBB.. is the 40 character artifact ID, AA the first 2 characters.
** If -L|--prefixlength is given, the length (default 2) of the directory
** prefix can be set to 0,1,..,9 characters.
*/
void deconstruct_cmd(void){
  const char *zDestDir;
  const char *zPrefixOpt;
  int         prefixLength = 0;
  char       *zAFileOutFormat;
  Stmt        q;

  /* check number of arguments */
  if( (g.argc != 3) && (g.argc != 5)  && (g.argc != 7)){
    usage ("?-R|--repository REPOSITORY? ?-L|--prefixlength N? DESTINATION");
  }
  /* get and check argument destination directory */
  zDestDir = g.argv[g.argc-1];
  if( !*zDestDir  || !file_isdir(zDestDir)) {
    fossil_panic("DESTINATION(%s) is not a directory!",zDestDir);
  }
  /* get and check prefix length argument and build format string */
  zPrefixOpt=find_option("prefixlength","L",1);
  if( !zPrefixOpt ){
    prefixLength = 2;
  }else{
    if( zPrefixOpt[0]>='0' && zPrefixOpt[0]<='9' && !zPrefixOpt[1] ){
      prefixLength = (int)(*zPrefixOpt-'0');
    }else{
      fossil_panic("N(%s) is not a a valid prefix length!",zPrefixOpt);
    }
  }
  if( prefixLength ){
    zAFileOutFormat = mprintf("%%s/%%.%ds/%%s",prefixLength);
  }else{
    zAFileOutFormat = mprintf("%%s/%%s");
  }
#ifndef _WIN32
  if( access(zDestDir, W_OK) ){
    fossil_panic("DESTINATION(%s) is not writeable!",zDestDir);
  }
#else
  /* write access on windows is not checked, errors will be
  ** dected on blob_write_to_file
  */
#endif





  /* open repository and open query for all artifacts */
  db_find_and_open_repository(1);























  db_prepare(&q, "SELECT rid,uuid FROM blob");
  /* loop over artifacts and write them to single files */


  while( db_step(&q)==SQLITE_ROW ){
    int         aRid;



    const char *zAUuid;
    char       *zAFName;
    Blob        zACont;


    /* get data from query */
    aRid   = db_column_int (&q, 0);
    zAUuid = db_column_text(&q, 1);

    /* construct output filename */
    zAFName = mprintf(zAFileOutFormat, zDestDir, zAUuid, zAUuid + prefixLength);

    /* read artifact contents from db and write to file */
    content_get(aRid,&zACont);
    blob_write_to_file(&zACont,zAFName);
    blob_reset(&zACont);

    /* free artifact filename string */
    free(zAFName);
  }
  /* close query statement */
  db_finalize(&q);
  /* free filename format string */

  free(zAFileOutFormat);
}







<
<
|


















|


<
<
<
<
<


|






>
>
>
>
>


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
|
|
>
>
>
|
<
|
>
|
<
<
<
|
<
<
|
|
|
<
|
|
<
<
|
<
<

>
|

527
528
529
530
531
532
533


534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555





556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604

605
606
607



608


609
610
611

612
613


614


615
616
617
618
** AABBBBBBBBB.. is the 40 character artifact ID, AA the first 2 characters.
** If -L|--prefixlength is given, the length (default 2) of the directory
** prefix can be set to 0,1,..,9 characters.
*/
void deconstruct_cmd(void){
  const char *zDestDir;
  const char *zPrefixOpt;


  Stmt        s;

  /* check number of arguments */
  if( (g.argc != 3) && (g.argc != 5)  && (g.argc != 7)){
    usage ("?-R|--repository REPOSITORY? ?-L|--prefixlength N? DESTINATION");
  }
  /* get and check argument destination directory */
  zDestDir = g.argv[g.argc-1];
  if( !*zDestDir  || !file_isdir(zDestDir)) {
    fossil_panic("DESTINATION(%s) is not a directory!",zDestDir);
  }
  /* get and check prefix length argument and build format string */
  zPrefixOpt=find_option("prefixlength","L",1);
  if( !zPrefixOpt ){
    prefixLength = 2;
  }else{
    if( zPrefixOpt[0]>='0' && zPrefixOpt[0]<='9' && !zPrefixOpt[1] ){
      prefixLength = (int)(*zPrefixOpt-'0');
    }else{
      fossil_fatal("N(%s) is not a a valid prefix length!",zPrefixOpt);
    }
  }





#ifndef _WIN32
  if( access(zDestDir, W_OK) ){
    fossil_fatal("DESTINATION(%s) is not writeable!",zDestDir);
  }
#else
  /* write access on windows is not checked, errors will be
  ** dected on blob_write_to_file
  */
#endif
  if( prefixLength ){
    zFNameFormat = mprintf("%s/%%.%ds/%%s",zDestDir,prefixLength);
  }else{
    zFNameFormat = mprintf("%s/%%s",zDestDir);
  }
  /* open repository and open query for all artifacts */
  db_find_and_open_repository(1);
  bag_init(&bagDone);
  ttyOutput = 1;
  processCnt = 0;
  if (!g.fQuiet) {
    printf("0 (0%%)...\r");
    fflush(stdout);
  }
  totalSize = db_int(0, "SELECT count(*) FROM blob");
  db_prepare(&s,
     "SELECT rid, size FROM blob /*scan*/"
     " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)"
     "   AND NOT EXISTS(SELECT 1 FROM delta WHERE rid=blob.rid)"
  );
  while( db_step(&s)==SQLITE_ROW ){
    int rid = db_column_int(&s, 0);
    int size = db_column_int(&s, 1);
    if( size>=0 ){
      Blob content;
      content_get(rid, &content);
      rebuild_step(rid, size, &content);
    }
  }
  db_finalize(&s);
  db_prepare(&s,
     "SELECT rid, size FROM blob"
     " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)"
  );
  while( db_step(&s)==SQLITE_ROW ){
    int rid = db_column_int(&s, 0);
    int size = db_column_int(&s, 1);
    if( size>=0 ){
      if( !bag_find(&bagDone, rid) ){
        Blob content;

        content_get(rid, &content);
        rebuild_step(rid, size, &content);
      }



    }


  }
  db_finalize(&s);
  if(!g.fQuiet && ttyOutput ){

    printf("\n");
  }





  /* free filename format string */
  free(zFNameFormat);
  zFNameFormat = 0;
}