Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Performance improvements to the "bag" object. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
1409fbe38c764e93ca606a195b572dfd |
| User & Date: | drh 2009-08-27 14:04:39.000 |
Context
|
2009-08-27
| ||
| 15:00 | Performance improvements on the compute_leavs() routine. There is opportunity for further improvement in this area. check-in: 6953ca813c user: drh tags: trunk | |
| 14:04 | Performance improvements to the "bag" object. check-in: 1409fbe38c user: drh tags: trunk | |
|
2009-08-26
| ||
| 20:24 | Layout changes to the "shun" control page. check-in: 50ab5c33e7 user: drh tags: trunk | |
Changes
Changes to src/bag.c.
| ︙ | ︙ | |||
30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
#include <assert.h>
#if INTERFACE
/*
** An integer can appear in the bag at most once.
** Integers must be positive.
*/
struct Bag {
int cnt; /* Number of integers in the bag */
int sz; /* Number of slots in a[] */
int used; /* Number of used slots in a[] */
int *a; /* Hash table of integers that are in the bag */
};
| > > > > > > > > > > > > > > > | 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
#include <assert.h>
#if INTERFACE
/*
** An integer can appear in the bag at most once.
** Integers must be positive.
**
** On a hash collision, search continues to the next slot in the array,
** looping back to the beginning of the array when we reach the end.
** The search stops when a match is found or upon encountering a 0 entry.
**
** When an entry is deleted, its value is changed to -1.
**
** Bag.cnt is the number of live entries in the table. Bag.used is
** the number of live entries plus the number of deleted entries. So
** Bag.used>=Bag.cnt. We want to keep Bag.used-Bag.cnt as small as
** possible.
**
** The length of a search increases as the hash table fills up. So
** the table is enlarged whenever Bag.used reaches half of Bag.sz. That
** way, the expected collision length never exceeds 2.
*/
struct Bag {
int cnt; /* Number of integers in the bag */
int sz; /* Number of slots in a[] */
int used; /* Number of used slots in a[] */
int *a; /* Hash table of integers that are in the bag */
};
|
| ︙ | ︙ | |||
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
** The hash function
*/
#define bag_hash(i) (i*101)
/*
** Change the size of the hash table on a bag so that
** it contains N slots
*/
static void bag_resize(Bag *p, int newSize){
int i;
Bag old;
old = *p;
assert( newSize>old.cnt );
p->a = malloc( sizeof(p->a[0])*newSize );
p->sz = newSize;
memset(p->a, 0, sizeof(p->a[0])*newSize );
for(i=0; i<old.sz; i++){
int e = old.a[i];
if( e>0 ){
unsigned h = bag_hash(e)%newSize;
while( p->a[h] ){
h++;
if( h==newSize ) h = 0;
}
p->a[h] = e;
}
}
p->used = p->cnt;
bag_clear(&old);
}
/*
** Insert element e into the bag if it is not there already.
** Return TRUE if the insert actually occurred. Return FALSE
** if the element was already in the bag.
*/
int bag_insert(Bag *p, int e){
unsigned h;
int rc = 0;
assert( e>0 );
if( p->used+1 >= p->sz/2 ){
| > > > > > > > > > > > > | | > | < | 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
** The hash function
*/
#define bag_hash(i) (i*101)
/*
** Change the size of the hash table on a bag so that
** it contains N slots
**
** Completely reconstruct the hash table from scratch. Deleted
** entries (indicated by a -1) are removed. When finished, it
** should be the case that p->cnt==p->used.
*/
static void bag_resize(Bag *p, int newSize){
int i;
Bag old;
int nDel = 0; /* Number of deleted entries */
int nLive = 0; /* Number of live entries */
old = *p;
assert( newSize>old.cnt );
p->a = malloc( sizeof(p->a[0])*newSize );
p->sz = newSize;
memset(p->a, 0, sizeof(p->a[0])*newSize );
for(i=0; i<old.sz; i++){
int e = old.a[i];
if( e>0 ){
unsigned h = bag_hash(e)%newSize;
while( p->a[h] ){
h++;
if( h==newSize ) h = 0;
}
p->a[h] = e;
nLive++;
}else if( e<0 ){
nDel++;
}
}
assert( p->cnt == nLive );
assert( p->used == nLive+nDel );
p->used = p->cnt;
bag_clear(&old);
}
/*
** Insert element e into the bag if it is not there already.
** Return TRUE if the insert actually occurred. Return FALSE
** if the element was already in the bag.
*/
int bag_insert(Bag *p, int e){
unsigned h;
int rc = 0;
assert( e>0 );
if( p->used+1 >= p->sz/2 ){
int n = p->sz*2;
bag_resize(p, n + 20 );
}
h = bag_hash(e)%p->sz;
while( p->a[h]>0 && p->a[h]!=e ){
h++;
if( h>=p->sz ) h = 0;
}
if( p->a[h]<=0 ){
if( p->a[h]==0 ) p->used++;
p->a[h] = e;
p->cnt++;
rc = 1;
}
return rc;
}
/*
|
| ︙ | ︙ | |||
144 145 146 147 148 149 150 |
if( p->sz==0 ) return;
h = bag_hash(e)%p->sz;
while( p->a[h] && p->a[h]!=e ){
h++;
if( h>=p->sz ) h = 0;
}
if( p->a[h] ){
| > > > > > > | > > > > | | 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
if( p->sz==0 ) return;
h = bag_hash(e)%p->sz;
while( p->a[h] && p->a[h]!=e ){
h++;
if( h>=p->sz ) h = 0;
}
if( p->a[h] ){
int nx = h+1;
if( nx>=p->sz ) nx = 0;
if( p->a[nx]==0 ){
p->a[h] = 0;
p->used--;
}else{
p->a[h] = -1;
}
p->cnt--;
if( p->cnt==0 ){
memset(p->a, 0, p->sz*sizeof(p->a[0]));
p->used = 0;
}else if( p->sz>40 && p->cnt<p->sz/8 ){
bag_resize(p, p->sz/2);
}
}
}
/*
** Return the first element in the bag. Return 0 if the bag
|
| ︙ | ︙ |