[ruby-dev:31729] packed st_table

From: Nobuyoshi Nakada <nobu@...>
Date: 2007-09-04 08:43:49 UTC
List: ruby-dev #31729
なかだです。

田中さんに「Hashの順序はどっちでもいいからこっちをやれ」といわれ
たので、st_tableのentries_packedを一般化してみました。


Index: st.c
===================================================================
--- st.c	(revision 13339)
+++ st.c	(working copy)
@@ -61,5 +61,4 @@ static void rehash(st_table *);
 
 #define alloc(type) (type*)malloc((size_t)sizeof(type))
-#define Calloc(n,s) (char*)calloc((n),(s))
 
 #define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
@@ -77,5 +76,5 @@ static void rehash(st_table *);
 Table of prime numbers 2^n+a, 2<=n<=30.
 */
-static long primes[] = {
+static const unsigned int primes[] = {
 	8 + 3,
 	16 + 3,
@@ -110,5 +109,5 @@ static long primes[] = {
 
 static int
-new_size(int size)
+new_size(unsigned int size)
 {
     int i;
@@ -120,8 +119,8 @@ new_size(int size)
     return -1;
 #else
-    int newsize;
+    unsigned int newsize;
 
     for (i = 0, newsize = MINSIZE;
-	 i < (int )(sizeof(primes)/sizeof(primes[0]));
+	 i < (int)(sizeof(primes)/sizeof(primes[0])) - 1;
 	 i++, newsize <<= 1)
     {
@@ -146,5 +145,12 @@ stat_col()
 #endif
 
-#define MAX_PACKED_NUMHASH 5
+#define PACKED_RATIO(table) ((table)->type == &type_numhash ? 2 : 3)
+#define PACKED_COUNT(table, n) ((n) * PACKED_RATIO(table))
+#define PACKABLE(table, n) \
+    ((n) * PACKED_RATIO(table) <= ST_DEFAULT_INIT_TABLE_SIZE)
+
+#define PACKED_EQUAL(table, i, r, hash_val, key)\
+    (((r) < 3 || (st_data_t)(table)->bins[(i)+2] == (hash_val)) &&\
+     EQUAL((table), (st_data_t)(table)->bins[i], (key)))
 
 st_table*
@@ -161,11 +167,12 @@ st_init_table_with_size(const struct st_
 
     size = new_size(size);	/* round up to prime number */
+    if (size == -1) return 0;
 
     tbl = alloc(st_table);
     tbl->type = type;
     tbl->num_entries = 0;
-    tbl->entries_packed = type == &type_numhash && size/2 <= MAX_PACKED_NUMHASH;
-    tbl->num_bins = size;
-    tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+    tbl->entries_packed = 1;
+    tbl->num_bins = 0;
+    tbl->bins = calloc(size, sizeof(st_table_entry*));
     tbl->head = 0;
 
@@ -207,8 +214,9 @@ st_clear(st_table *table)
 {
     register st_table_entry *ptr, *next;
-    int i;
+    unsigned int i;
 
     if (table->entries_packed) {
         table->num_entries = 0;
+        table->num_bins = 0;
         return;
     }
@@ -263,10 +271,12 @@ st_lookup(st_table *table, register st_d
 
     if (table->entries_packed) {
-        int i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                if (value !=0) *value = (st_data_t)table->bins[i*2+1];
-                return 1;
-            }
+        unsigned int i, hash_val;
+	const unsigned int r = PACKED_RATIO(table);
+	hash_val = r > 2 ? do_hash(key, table) : key;
+	for (i = 0; i < table->num_bins; i += r) {
+	    if (PACKED_EQUAL(table, i, r, hash_val, key)) {
+		if (value != 0) *value = (st_data_t)table->bins[i+1];
+		return 1;
+	    }
         }
         return 0;
@@ -314,17 +324,45 @@ static void
 unpack_entries(register st_table *table)
 {
-    int i;
-    struct st_table_entry *packed_bins[MAX_PACKED_NUMHASH*2];
-    int num_entries = table->num_entries;
+    struct st_table_entry *packed_bins[ST_DEFAULT_INIT_TABLE_SIZE];
+    unsigned int i, num_bins = table->num_bins;
+    const unsigned int r = PACKED_RATIO(table);
 
-    memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * num_entries*2);
+    memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * num_bins);
     table->entries_packed = 0;
     table->num_entries = 0;
+    table->num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
     memset(table->bins, 0, sizeof(struct st_table_entry *) * table->num_bins);
-    for (i = 0; i < num_entries; i++) {
-        st_insert(table, (st_data_t)packed_bins[i*2], (st_data_t)packed_bins[i*2+1]);
+    for (i = 0; i < num_bins; i += r) {
+        st_data_t key = (st_data_t)packed_bins[i];
+	st_data_t value = (st_data_t)packed_bins[i+1];
+	unsigned int hash_val, bin_pos;
+
+	hash_val = r > 2 ? (unsigned int)packed_bins[i+2] : key;
+        bin_pos = hash_val % table->num_bins;
+	ADD_DIRECT(table, key, value, hash_val, bin_pos);
     }
 }
 
+static int
+add_direct_packed(register st_table *table, const unsigned int r,
+		  st_data_t key, st_data_t value, unsigned int hash_val)
+{
+    unsigned int i;
+
+    if (table->num_bins >= ST_DEFAULT_INIT_TABLE_SIZE / r * r) {
+	table->bins = xrealloc(table->bins, sizeof(struct st_table_entry *) * (table->num_bins+r));
+    }
+    else if (!PACKABLE(table, table->num_bins+r)) {
+	return 0;
+    }
+    i = table->num_bins;
+    table->num_bins += r;
+    table->num_entries++;
+    table->bins[i] = (struct st_table_entry*)key;
+    table->bins[i+1] = (struct st_table_entry*)value;
+    if (r > 2) table->bins[i+2] = (struct st_table_entry*)hash_val;
+    return 1;
+}
+
 int
 st_insert(register st_table *table, register st_data_t key, st_data_t value)
@@ -334,20 +372,17 @@ st_insert(register st_table *table, regi
 
     if (table->entries_packed) {
-        int i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                table->bins[i*2+1] = (struct st_table_entry*)value;
+        unsigned int i, hash_val;
+	const unsigned int r = PACKED_RATIO(table);
+	hash_val = r > 2 ? do_hash(key, table) : key;
+        for (i = 0; i < table->num_bins; i += r) {
+	    if (PACKED_EQUAL(table, i, r, hash_val, key)) {
+                table->bins[i+1] = (struct st_table_entry*)value;
                 return 1;
             }
         }
-        if ((table->num_entries+1) * 2 <= table->num_bins && table->num_entries+1 <= MAX_PACKED_NUMHASH) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
+	if (add_direct_packed(table, r, key, value, hash_val)) {
             return 0;
         }
-        else {
-            unpack_entries(table);
-        }
+	unpack_entries(table);
     }
 
@@ -371,14 +406,11 @@ st_add_direct(st_table *table, st_data_t
 
     if (table->entries_packed) {
-        int i;
-        if ((table->num_entries+1) * 2 <= table->num_bins && table->num_entries+1 <= MAX_PACKED_NUMHASH) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
+        unsigned int hash_val;
+	const unsigned int r = PACKED_RATIO(table);
+	hash_val = r > 2 ? do_hash(key, table) : key;
+	if (add_direct_packed(table, r, key, value, hash_val)) {
             return;
         }
-        else {
-            unpack_entries(table);
-        }
+	unpack_entries(table);
     }
 
@@ -392,10 +424,11 @@ rehash(register st_table *table)
 {
     register st_table_entry *ptr, **new_bins;
-    int i, new_num_bins;
+    unsigned int i, new_num_bins;
     unsigned int hash_val;
+    int size = new_size(table->num_bins+1);
 
-    new_num_bins = new_size(table->num_bins+1);
-    new_bins = (st_table_entry**)
-	xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*));
+    if (size == -1) return;
+    new_num_bins = (unsigned int)size;
+    new_bins = xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*));
     for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0;
     table->num_bins = new_num_bins;
@@ -416,5 +449,5 @@ st_copy(st_table *old_table)
     st_table *new_table;
     st_table_entry *ptr, *entry, *prev, **tail;
-    int num_bins = old_table->num_bins;
+    unsigned int num_bins = old_table->num_bins;
     unsigned int hash_val;
 
@@ -425,6 +458,5 @@ st_copy(st_table *old_table)
 
     *new_table = *old_table;
-    new_table->bins = (st_table_entry**)
-	Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+    new_table->bins = calloc(num_bins, sizeof(st_table_entry*));
 
     if (new_table->bins == 0) {
@@ -485,11 +517,14 @@ st_delete(register st_table *table, regi
 
     if (table->entries_packed) {
-        int i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == *key) {
-                if (value != 0) *value = (st_data_t)table->bins[i*2+1];
+        unsigned int i;
+	const unsigned int r = PACKED_RATIO(table);
+	hash_val = r > 2 ? do_hash(*key, table) : 0;
+        for (i = 0; i < table->num_bins; i += r) {
+            if (PACKED_EQUAL(table, i, r, hash_val, *key)) {
+                if (value != 0) *value = (st_data_t)table->bins[i+1];
                 table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+		table->num_bins -= r;
+                memmove(&table->bins[i], &table->bins[i+r],
+                        sizeof(struct st_table_entry*) * (table->num_bins-i));
                 return 1;
             }
@@ -522,4 +557,20 @@ st_delete_safe(register st_table *table,
     register st_table_entry *ptr;
 
+    if (table->entries_packed) {
+        unsigned int i;
+	const unsigned int r = PACKED_RATIO(table);
+	hash_val = r > 2 ? do_hash(*key, table) : 0;
+        for (i = 0; i < table->num_bins; i += r) {
+            if (PACKED_EQUAL(table, i, r, hash_val, *key)) {
+                if (value != 0) *value = (st_data_t)table->bins[i+1];
+		table->num_entries--;
+                table->bins[i] = table->bins[i+1] = (struct st_table_entry *)never;
+                return 1;
+            }
+        }
+        if (value != 0) *value = 0;
+        return 0;
+    }
+
     hash_val = do_hash_bin(*key, table);
     ptr = table->bins[hash_val];
@@ -543,5 +594,18 @@ st_cleanup_safe(st_table *table, st_data
 {
     st_table_entry *ptr, **last, *tmp;
-    int i;
+    unsigned int i;
+
+    if (table->entries_packed) {
+        unsigned int j;
+	const unsigned int r = PACKED_RATIO(table);
+        for (i = j = 0; i < table->num_bins; i += r) {
+            if ((st_data_t)table->bins[i] != never && i > j) {
+		memcpy(&table->bins[j], table->bins[i],
+		       sizeof(st_table_entry *) * r);
+		j++;
+            }
+        }
+        return;
+    }
 
     for (i = 0; i < table->num_bins; i++) {
@@ -565,24 +629,16 @@ st_foreach(st_table *table, int (*func)(
     st_table_entry *ptr, **last, *tmp;
     enum st_retval retval;
-    int i, end;
+    unsigned int i;
+    int end;
 
     if (table->entries_packed) {
-        for (i = 0; i < table->num_entries; i++) {
-            int j;
+	const unsigned int r = PACKED_RATIO(table);
+        for (i = 0; i < table->num_bins; i += r) {
             st_data_t key, val;
-            key = (st_data_t)table->bins[i*2];
-            val = (st_data_t)table->bins[i*2+1];
+            key = (st_data_t)table->bins[i];
+            val = (st_data_t)table->bins[i+1];
             retval = (*func)(key, val, arg);
             switch (retval) {
 	      case ST_CHECK:	/* check if hash is modified during iteration */
-                for (j = 0; j < table->num_entries; j++) {
-                    if ((st_data_t)table->bins[j*2] == key)
-                        break;
-                }
-                if (j == table->num_entries) {
-                    /* call func with error notice */
-                    retval = (*func)(0, 0, arg, 1);
-                    return 1;
-                }
 		/* fall through */
 	      case ST_CONTINUE:
@@ -592,7 +648,7 @@ st_foreach(st_table *table, int (*func)(
 	      case ST_DELETE:
                 table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
-                i--;
+		if (i >= (table->num_bins -= r)) return 0;
+		memmove(&table->bins[i], &table->bins[i+1],
+			sizeof(struct st_table_entry*) * (table->num_bins-i));
                 break;
             }
@@ -645,24 +701,17 @@ st_reverse_foreach(st_table *table, int 
     st_table_entry *ptr, **last, *tmp;
     enum st_retval retval;
-    int i, end;
+    unsigned int i;
+    int end;
 
     if (table->entries_packed) {
-        for (i = table->num_entries-1; 0 <= i; i--) {
-            int j;
+	const unsigned int r = PACKED_RATIO(table);
+        for (i = table->num_bins; i > 0;) {
             st_data_t key, val;
-            key = (st_data_t)table->bins[i*2];
-            val = (st_data_t)table->bins[i*2+1];
+	    i -= r;
+            key = (st_data_t)table->bins[i];
+            val = (st_data_t)table->bins[i+1];
             retval = (*func)(key, val, arg);
             switch (retval) {
 	      case ST_CHECK:	/* check if hash is modified during iteration */
-                for (j = 0; j < table->num_entries; j++) {
-                    if ((st_data_t)table->bins[j*2] == key)
-                        break;
-                }
-                if (j == table->num_entries) {
-                    /* call func with error notice */
-                    retval = (*func)(0, 0, arg, 1);
-                    return 1;
-                }
 		/* fall through */
 	      case ST_CONTINUE:
@@ -672,6 +721,7 @@ st_reverse_foreach(st_table *table, int 
 	      case ST_DELETE:
                 table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+		if (i >= (table->num_bins -= r)) break;
+                memmove(&table->bins[i], &table->bins[i+1],
+                        sizeof(struct st_table_entry*) * (table->num_bins-i));
                 break;
             }


-- 
--- 僕の前にBugはない。
--- 僕の後ろにBugはできる。
    中田 伸悦

In This Thread

Prev Next