[ruby-dev:31729] packed st_table
From:
Nobuyoshi Nakada <nobu@...>
Date:
2007-09-04 08:43:49 UTC
List:
ruby-dev #31729
なかだです。
田中さんに「Hashの順序はどっちでもいいからこっちをやれ」といわれ
たので、st_tableのentries_packedを一般化してみました。
Index: st.c
===================================================================
--- st.c (revision 13339)
+++ st.c (working copy)
@@ -61,5 +61,4 @@ static void rehash(st_table *);
#define alloc(type) (type*)malloc((size_t)sizeof(type))
-#define Calloc(n,s) (char*)calloc((n),(s))
#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
@@ -77,5 +76,5 @@ static void rehash(st_table *);
Table of prime numbers 2^n+a, 2<=n<=30.
*/
-static long primes[] = {
+static const unsigned int primes[] = {
8 + 3,
16 + 3,
@@ -110,5 +109,5 @@ static long primes[] = {
static int
-new_size(int size)
+new_size(unsigned int size)
{
int i;
@@ -120,8 +119,8 @@ new_size(int size)
return -1;
#else
- int newsize;
+ unsigned int newsize;
for (i = 0, newsize = MINSIZE;
- i < (int )(sizeof(primes)/sizeof(primes[0]));
+ i < (int)(sizeof(primes)/sizeof(primes[0])) - 1;
i++, newsize <<= 1)
{
@@ -146,5 +145,12 @@ stat_col()
#endif
-#define MAX_PACKED_NUMHASH 5
+#define PACKED_RATIO(table) ((table)->type == &type_numhash ? 2 : 3)
+#define PACKED_COUNT(table, n) ((n) * PACKED_RATIO(table))
+#define PACKABLE(table, n) \
+ ((n) * PACKED_RATIO(table) <= ST_DEFAULT_INIT_TABLE_SIZE)
+
+#define PACKED_EQUAL(table, i, r, hash_val, key)\
+ (((r) < 3 || (st_data_t)(table)->bins[(i)+2] == (hash_val)) &&\
+ EQUAL((table), (st_data_t)(table)->bins[i], (key)))
st_table*
@@ -161,11 +167,12 @@ st_init_table_with_size(const struct st_
size = new_size(size); /* round up to prime number */
+ if (size == -1) return 0;
tbl = alloc(st_table);
tbl->type = type;
tbl->num_entries = 0;
- tbl->entries_packed = type == &type_numhash && size/2 <= MAX_PACKED_NUMHASH;
- tbl->num_bins = size;
- tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+ tbl->entries_packed = 1;
+ tbl->num_bins = 0;
+ tbl->bins = calloc(size, sizeof(st_table_entry*));
tbl->head = 0;
@@ -207,8 +214,9 @@ st_clear(st_table *table)
{
register st_table_entry *ptr, *next;
- int i;
+ unsigned int i;
if (table->entries_packed) {
table->num_entries = 0;
+ table->num_bins = 0;
return;
}
@@ -263,10 +271,12 @@ st_lookup(st_table *table, register st_d
if (table->entries_packed) {
- int i;
- for (i = 0; i < table->num_entries; i++) {
- if ((st_data_t)table->bins[i*2] == key) {
- if (value !=0) *value = (st_data_t)table->bins[i*2+1];
- return 1;
- }
+ unsigned int i, hash_val;
+ const unsigned int r = PACKED_RATIO(table);
+ hash_val = r > 2 ? do_hash(key, table) : key;
+ for (i = 0; i < table->num_bins; i += r) {
+ if (PACKED_EQUAL(table, i, r, hash_val, key)) {
+ if (value != 0) *value = (st_data_t)table->bins[i+1];
+ return 1;
+ }
}
return 0;
@@ -314,17 +324,45 @@ static void
unpack_entries(register st_table *table)
{
- int i;
- struct st_table_entry *packed_bins[MAX_PACKED_NUMHASH*2];
- int num_entries = table->num_entries;
+ struct st_table_entry *packed_bins[ST_DEFAULT_INIT_TABLE_SIZE];
+ unsigned int i, num_bins = table->num_bins;
+ const unsigned int r = PACKED_RATIO(table);
- memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * num_entries*2);
+ memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * num_bins);
table->entries_packed = 0;
table->num_entries = 0;
+ table->num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
memset(table->bins, 0, sizeof(struct st_table_entry *) * table->num_bins);
- for (i = 0; i < num_entries; i++) {
- st_insert(table, (st_data_t)packed_bins[i*2], (st_data_t)packed_bins[i*2+1]);
+ for (i = 0; i < num_bins; i += r) {
+ st_data_t key = (st_data_t)packed_bins[i];
+ st_data_t value = (st_data_t)packed_bins[i+1];
+ unsigned int hash_val, bin_pos;
+
+ hash_val = r > 2 ? (unsigned int)packed_bins[i+2] : key;
+ bin_pos = hash_val % table->num_bins;
+ ADD_DIRECT(table, key, value, hash_val, bin_pos);
}
}
+static int
+add_direct_packed(register st_table *table, const unsigned int r,
+ st_data_t key, st_data_t value, unsigned int hash_val)
+{
+ unsigned int i;
+
+ if (table->num_bins >= ST_DEFAULT_INIT_TABLE_SIZE / r * r) {
+ table->bins = xrealloc(table->bins, sizeof(struct st_table_entry *) * (table->num_bins+r));
+ }
+ else if (!PACKABLE(table, table->num_bins+r)) {
+ return 0;
+ }
+ i = table->num_bins;
+ table->num_bins += r;
+ table->num_entries++;
+ table->bins[i] = (struct st_table_entry*)key;
+ table->bins[i+1] = (struct st_table_entry*)value;
+ if (r > 2) table->bins[i+2] = (struct st_table_entry*)hash_val;
+ return 1;
+}
+
int
st_insert(register st_table *table, register st_data_t key, st_data_t value)
@@ -334,20 +372,17 @@ st_insert(register st_table *table, regi
if (table->entries_packed) {
- int i;
- for (i = 0; i < table->num_entries; i++) {
- if ((st_data_t)table->bins[i*2] == key) {
- table->bins[i*2+1] = (struct st_table_entry*)value;
+ unsigned int i, hash_val;
+ const unsigned int r = PACKED_RATIO(table);
+ hash_val = r > 2 ? do_hash(key, table) : key;
+ for (i = 0; i < table->num_bins; i += r) {
+ if (PACKED_EQUAL(table, i, r, hash_val, key)) {
+ table->bins[i+1] = (struct st_table_entry*)value;
return 1;
}
}
- if ((table->num_entries+1) * 2 <= table->num_bins && table->num_entries+1 <= MAX_PACKED_NUMHASH) {
- i = table->num_entries++;
- table->bins[i*2] = (struct st_table_entry*)key;
- table->bins[i*2+1] = (struct st_table_entry*)value;
+ if (add_direct_packed(table, r, key, value, hash_val)) {
return 0;
}
- else {
- unpack_entries(table);
- }
+ unpack_entries(table);
}
@@ -371,14 +406,11 @@ st_add_direct(st_table *table, st_data_t
if (table->entries_packed) {
- int i;
- if ((table->num_entries+1) * 2 <= table->num_bins && table->num_entries+1 <= MAX_PACKED_NUMHASH) {
- i = table->num_entries++;
- table->bins[i*2] = (struct st_table_entry*)key;
- table->bins[i*2+1] = (struct st_table_entry*)value;
+ unsigned int hash_val;
+ const unsigned int r = PACKED_RATIO(table);
+ hash_val = r > 2 ? do_hash(key, table) : key;
+ if (add_direct_packed(table, r, key, value, hash_val)) {
return;
}
- else {
- unpack_entries(table);
- }
+ unpack_entries(table);
}
@@ -392,10 +424,11 @@ rehash(register st_table *table)
{
register st_table_entry *ptr, **new_bins;
- int i, new_num_bins;
+ unsigned int i, new_num_bins;
unsigned int hash_val;
+ int size = new_size(table->num_bins+1);
- new_num_bins = new_size(table->num_bins+1);
- new_bins = (st_table_entry**)
- xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*));
+ if (size == -1) return;
+ new_num_bins = (unsigned int)size;
+ new_bins = xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*));
for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0;
table->num_bins = new_num_bins;
@@ -416,5 +449,5 @@ st_copy(st_table *old_table)
st_table *new_table;
st_table_entry *ptr, *entry, *prev, **tail;
- int num_bins = old_table->num_bins;
+ unsigned int num_bins = old_table->num_bins;
unsigned int hash_val;
@@ -425,6 +458,5 @@ st_copy(st_table *old_table)
*new_table = *old_table;
- new_table->bins = (st_table_entry**)
- Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+ new_table->bins = calloc(num_bins, sizeof(st_table_entry*));
if (new_table->bins == 0) {
@@ -485,11 +517,14 @@ st_delete(register st_table *table, regi
if (table->entries_packed) {
- int i;
- for (i = 0; i < table->num_entries; i++) {
- if ((st_data_t)table->bins[i*2] == *key) {
- if (value != 0) *value = (st_data_t)table->bins[i*2+1];
+ unsigned int i;
+ const unsigned int r = PACKED_RATIO(table);
+ hash_val = r > 2 ? do_hash(*key, table) : 0;
+ for (i = 0; i < table->num_bins; i += r) {
+ if (PACKED_EQUAL(table, i, r, hash_val, *key)) {
+ if (value != 0) *value = (st_data_t)table->bins[i+1];
table->num_entries--;
- memmove(&table->bins[i*2], &table->bins[(i+1)*2],
- sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+ table->num_bins -= r;
+ memmove(&table->bins[i], &table->bins[i+r],
+ sizeof(struct st_table_entry*) * (table->num_bins-i));
return 1;
}
@@ -522,4 +557,20 @@ st_delete_safe(register st_table *table,
register st_table_entry *ptr;
+ if (table->entries_packed) {
+ unsigned int i;
+ const unsigned int r = PACKED_RATIO(table);
+ hash_val = r > 2 ? do_hash(*key, table) : 0;
+ for (i = 0; i < table->num_bins; i += r) {
+ if (PACKED_EQUAL(table, i, r, hash_val, *key)) {
+ if (value != 0) *value = (st_data_t)table->bins[i+1];
+ table->num_entries--;
+ table->bins[i] = table->bins[i+1] = (struct st_table_entry *)never;
+ return 1;
+ }
+ }
+ if (value != 0) *value = 0;
+ return 0;
+ }
+
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];
@@ -543,5 +594,18 @@ st_cleanup_safe(st_table *table, st_data
{
st_table_entry *ptr, **last, *tmp;
- int i;
+ unsigned int i;
+
+ if (table->entries_packed) {
+ unsigned int j;
+ const unsigned int r = PACKED_RATIO(table);
+ for (i = j = 0; i < table->num_bins; i += r) {
+ if ((st_data_t)table->bins[i] != never && i > j) {
+ memcpy(&table->bins[j], table->bins[i],
+ sizeof(st_table_entry *) * r);
+ j++;
+ }
+ }
+ return;
+ }
for (i = 0; i < table->num_bins; i++) {
@@ -565,24 +629,16 @@ st_foreach(st_table *table, int (*func)(
st_table_entry *ptr, **last, *tmp;
enum st_retval retval;
- int i, end;
+ unsigned int i;
+ int end;
if (table->entries_packed) {
- for (i = 0; i < table->num_entries; i++) {
- int j;
+ const unsigned int r = PACKED_RATIO(table);
+ for (i = 0; i < table->num_bins; i += r) {
st_data_t key, val;
- key = (st_data_t)table->bins[i*2];
- val = (st_data_t)table->bins[i*2+1];
+ key = (st_data_t)table->bins[i];
+ val = (st_data_t)table->bins[i+1];
retval = (*func)(key, val, arg);
switch (retval) {
case ST_CHECK: /* check if hash is modified during iteration */
- for (j = 0; j < table->num_entries; j++) {
- if ((st_data_t)table->bins[j*2] == key)
- break;
- }
- if (j == table->num_entries) {
- /* call func with error notice */
- retval = (*func)(0, 0, arg, 1);
- return 1;
- }
/* fall through */
case ST_CONTINUE:
@@ -592,7 +648,7 @@ st_foreach(st_table *table, int (*func)(
case ST_DELETE:
table->num_entries--;
- memmove(&table->bins[i*2], &table->bins[(i+1)*2],
- sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
- i--;
+ if (i >= (table->num_bins -= r)) return 0;
+ memmove(&table->bins[i], &table->bins[i+1],
+ sizeof(struct st_table_entry*) * (table->num_bins-i));
break;
}
@@ -645,24 +701,17 @@ st_reverse_foreach(st_table *table, int
st_table_entry *ptr, **last, *tmp;
enum st_retval retval;
- int i, end;
+ unsigned int i;
+ int end;
if (table->entries_packed) {
- for (i = table->num_entries-1; 0 <= i; i--) {
- int j;
+ const unsigned int r = PACKED_RATIO(table);
+ for (i = table->num_bins; i > 0;) {
st_data_t key, val;
- key = (st_data_t)table->bins[i*2];
- val = (st_data_t)table->bins[i*2+1];
+ i -= r;
+ key = (st_data_t)table->bins[i];
+ val = (st_data_t)table->bins[i+1];
retval = (*func)(key, val, arg);
switch (retval) {
case ST_CHECK: /* check if hash is modified during iteration */
- for (j = 0; j < table->num_entries; j++) {
- if ((st_data_t)table->bins[j*2] == key)
- break;
- }
- if (j == table->num_entries) {
- /* call func with error notice */
- retval = (*func)(0, 0, arg, 1);
- return 1;
- }
/* fall through */
case ST_CONTINUE:
@@ -672,6 +721,7 @@ st_reverse_foreach(st_table *table, int
case ST_DELETE:
table->num_entries--;
- memmove(&table->bins[i*2], &table->bins[(i+1)*2],
- sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+ if (i >= (table->num_bins -= r)) break;
+ memmove(&table->bins[i], &table->bins[i+1],
+ sizeof(struct st_table_entry*) * (table->num_bins-i));
break;
}
--
--- 僕の前にBugはない。
--- 僕の後ろにBugはできる。
中田 伸悦