Fossil

Check-in [1409fbe38c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance improvements to the "bag" object.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 1409fbe38c764e93ca606a195b572dfd70aec1b4
User & Date: drh 2009-08-27 14:04:39.000
Context
2009-08-27
15:00
Performance improvements on the compute_leavs() routine. There is opportunity for further improvement in this area. check-in: 6953ca813c user: drh tags: trunk
14:04
Performance improvements to the "bag" object. check-in: 1409fbe38c user: drh tags: trunk
2009-08-26
20:24
Layout changes to the "shun" control page. check-in: 50ab5c33e7 user: drh tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/bag.c.
30
31
32
33
34
35
36















37
38
39
40
41
42
43
#include <assert.h>


#if INTERFACE
/*
** An integer can appear in the bag at most once.
** Integers must be positive.















*/
struct Bag {
  int cnt;   /* Number of integers in the bag */
  int sz;    /* Number of slots in a[] */
  int used;  /* Number of used slots in a[] */
  int *a;    /* Hash table of integers that are in the bag */
};







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <assert.h>


#if INTERFACE
/*
** An integer can appear in the bag at most once.
** Integers must be positive.
**
** On a hash collision, search continues to the next slot in the array,
** looping back to the beginning of the array when we reach the end.
** The search stops when a match is found or upon encountering a 0 entry.
**
** When an entry is deleted, its value is changed to -1.
**
** Bag.cnt is the number of live entries in the table.  Bag.used is
** the number of live entries plus the number of deleted entries.  So
** Bag.used>=Bag.cnt.  We want to keep Bag.used-Bag.cnt as small as
** possible.
**
** The length of a search increases as the hash table fills up.  So
** the table is enlarged whenever Bag.used reaches half of Bag.sz.  That
** way, the expected collision length never exceeds 2.
*/
struct Bag {
  int cnt;   /* Number of integers in the bag */
  int sz;    /* Number of slots in a[] */
  int used;  /* Number of used slots in a[] */
  int *a;    /* Hash table of integers that are in the bag */
};
62
63
64
65
66
67
68




69
70
71
72


73
74
75
76
77
78
79
80
81
82
83
84
85
86
87



88
89


90
91
92
93
94
95
96
97
98
99
100
101
102
103

104
105
106
107
108
109
110

111
112
113
114
115
116
117
118
119
120
** The hash function
*/
#define bag_hash(i)  (i*101)

/*
** Change the size of the hash table on a bag so that
** it contains N slots




*/
static void bag_resize(Bag *p, int newSize){
  int i;
  Bag old;



  old = *p;
  assert( newSize>old.cnt );
  p->a = malloc( sizeof(p->a[0])*newSize );
  p->sz = newSize;
  memset(p->a, 0, sizeof(p->a[0])*newSize );
  for(i=0; i<old.sz; i++){
    int e = old.a[i];
    if( e>0 ){
      unsigned h = bag_hash(e)%newSize;
      while( p->a[h] ){
        h++;
        if( h==newSize ) h = 0;
      }
      p->a[h] = e;



    }
  }


  p->used = p->cnt;
  bag_clear(&old);
}

/*
** Insert element e into the bag if it is not there already.
** Return TRUE if the insert actually occurred.  Return FALSE
** if the element was already in the bag.
*/
int bag_insert(Bag *p, int e){
  unsigned h;
  int rc = 0;
  assert( e>0 );
  if( p->used+1 >= p->sz/2 ){

    bag_resize(p,  p->cnt*2 + 20 );
  }
  h = bag_hash(e)%p->sz;
  while( p->a[h] && p->a[h]!=e ){
    h++;
    if( h>=p->sz ) h = 0;
  }

  if( p->a[h]==0 ){
    p->a[h] = e;
    p->used++;
    p->cnt++;
    rc = 1;
  }
  return rc;
}

/*







>
>
>
>




>
>















>
>
>


>
>














>
|


|



>
|

<







77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

141
142
143
144
145
146
147
** The hash function
*/
#define bag_hash(i)  (i*101)

/*
** Change the size of the hash table on a bag so that
** it contains N slots
**
** Completely reconstruct the hash table from scratch.  Deleted
** entries (indicated by a -1) are removed.  When finished, it 
** should be the case that p->cnt==p->used.
*/
static void bag_resize(Bag *p, int newSize){
  int i;
  Bag old;
  int nDel = 0;   /* Number of deleted entries */
  int nLive = 0;  /* Number of live entries */

  old = *p;
  assert( newSize>old.cnt );
  p->a = malloc( sizeof(p->a[0])*newSize );
  p->sz = newSize;
  memset(p->a, 0, sizeof(p->a[0])*newSize );
  for(i=0; i<old.sz; i++){
    int e = old.a[i];
    if( e>0 ){
      unsigned h = bag_hash(e)%newSize;
      while( p->a[h] ){
        h++;
        if( h==newSize ) h = 0;
      }
      p->a[h] = e;
      nLive++;
    }else if( e<0 ){
      nDel++;
    }
  }
  assert( p->cnt == nLive );
  assert( p->used == nLive+nDel );
  p->used = p->cnt;
  bag_clear(&old);
}

/*
** Insert element e into the bag if it is not there already.
** Return TRUE if the insert actually occurred.  Return FALSE
** if the element was already in the bag.
*/
int bag_insert(Bag *p, int e){
  unsigned h;
  int rc = 0;
  assert( e>0 );
  if( p->used+1 >= p->sz/2 ){
    int n = p->sz*2;
    bag_resize(p,  n + 20 );
  }
  h = bag_hash(e)%p->sz;
  while( p->a[h]>0 && p->a[h]!=e ){
    h++;
    if( h>=p->sz ) h = 0;
  }
  if( p->a[h]<=0 ){
    if( p->a[h]==0 ) p->used++;
    p->a[h] = e;

    p->cnt++;
    rc = 1;
  }
  return rc;
}

/*
144
145
146
147
148
149
150






151

152



153
154
155
156
157
158
159
160
  if( p->sz==0 ) return;
  h = bag_hash(e)%p->sz;
  while( p->a[h] && p->a[h]!=e ){
    h++;
    if( h>=p->sz ) h = 0;
  }
  if( p->a[h] ){






    p->a[h] = -1;

    p->cnt--;



    if( p->sz>20 && p->cnt<p->sz/8 ){
      bag_resize(p, p->sz/2);
    }
  }
}

/*
** Return the first element in the bag.  Return 0 if the bag







>
>
>
>
>
>
|
>

>
>
>
|







171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
  if( p->sz==0 ) return;
  h = bag_hash(e)%p->sz;
  while( p->a[h] && p->a[h]!=e ){
    h++;
    if( h>=p->sz ) h = 0;
  }
  if( p->a[h] ){
    int nx = h+1;
    if( nx>=p->sz ) nx = 0;
    if( p->a[nx]==0 ){
      p->a[h] = 0;
      p->used--;
    }else{
      p->a[h] = -1;
    }
    p->cnt--;
    if( p->cnt==0 ){
      memset(p->a, 0, p->sz*sizeof(p->a[0]));
      p->used = 0;
    }else if( p->sz>40 && p->cnt<p->sz/8 ){
      bag_resize(p, p->sz/2);
    }
  }
}

/*
** Return the first element in the bag.  Return 0 if the bag