[ruby-dev:42522] Proposal: thread local freelist
From:
SASADA Koichi <ko1@...>
Date:
2010-11-03 13:50:25 UTC
List:
ruby-dev #42522
ささだです。 オブジェクト管理に関する提案です。現在、オブジェクト管理のための freelist は、グローバルに 1 つですが、スレッドごとに持たせませんか。 現状での利点は、新しい RVALUE の確保がスレッドローカルになって、ちょっ とうれしーかなー、という感じです。オブジェクトのメモリ上のローカリティが 向上するかもしれない、それで、CPU のキャッシュの効率が上がるかもしれな いってくらいで、気分の問題です。newobj は、間接参照のコストが 1 つ増える くらいだから、あまり問題無いんじゃないかと思っています。 # 現在でも、Thread はパタパタ切り替わるわけではないので、 # それなりの locality はあると思われます。 # これは、その locality を陽に上げよう、という提案です。 本質的な利点は、この辺に対して並列にゴニョゴニョしよう、というときに、 freelist みたいなグローバルな構造があると非常にやりづらく、これを解消す るためです。昨日、VALUE_CACHE という部分を消しましたが、これは元々私が 作っていた 並列 Thread Ruby で、freelist へのロックの競合を排除するため に設けましたが、本当は今回提案したような仕組みを作りたかったのです(が、 面倒だったので、とりあえず VALUE_CACHE でお茶を濁した)。 実装としては、次のように考えています。 - slot ごとに freelist を持たせる - スレッドは slot 保持する - newobj のときは、 - 保持した slot から obj を取り出す - もし無ければ、別の確保されていない slot を確保する - 確保されていない slot がなければ GC する - GC のときは、各 Thread が保持していた slot を解放させる (そのため、ある slot が、ある Thread 専用になるわけではない) とりあえず作った patch を添付しますが、なんか test-all で嫌な感じのエ ラーが出ているので、多分バグ入りです。lazy sweep にも対応出来てるんじゃ ないかと思います。多分。 -- // SASADA Koichi at atdot dot net
Attachments (1)
gc.tl.patch
(11.7 KB, text/x-diff)
Index: vm_core.h
===================================================================
--- vm_core.h (revision 29675)
+++ vm_core.h (working copy)
@@ -453,6 +453,7 @@ typedef struct rb_thread_struct {
#endif
jmp_buf machine_regs;
int mark_stack_len;
+ struct heaps_slot *heap_slot;
/* statistics data for profiler */
VALUE stat_insn_usage;
Index: gc.c
===================================================================
--- gc.c (revision 29675)
+++ gc.c (working copy)
@@ -286,6 +286,7 @@ typedef struct RVALUE {
struct heaps_slot {
void *membase;
RVALUE *slot;
+ RVALUE *freelist;
size_t limit;
struct heaps_slot *next;
struct heaps_slot *prev;
@@ -320,10 +321,10 @@ typedef struct rb_objspace {
size_t increment;
struct heaps_slot *ptr;
struct heaps_slot *sweep_slots;
+ struct heaps_slot *swept_slots;
struct sorted_heaps_slot *sorted;
size_t length;
size_t used;
- RVALUE *freelist;
RVALUE *range[2];
RVALUE *freed;
size_t live_num;
@@ -371,7 +372,6 @@ int *ruby_initial_gc_stress_ptr = &rb_ob
#define heaps objspace->heap.ptr
#define heaps_length objspace->heap.length
#define heaps_used objspace->heap.used
-#define freelist objspace->heap.freelist
#define lomem objspace->heap.range[0]
#define himem objspace->heap.range[1]
#define heaps_inc objspace->heap.increment
@@ -935,6 +935,7 @@ assign_heap_slot(rb_objspace_t *objspace
slot->next = heaps;
if (heaps) heaps->prev = slot;
heaps = slot;
+ objspace->heap.swept_slots = slot;
membase = p;
if ((VALUE)p % sizeof(RVALUE) != 0) {
@@ -977,8 +978,8 @@ assign_heap_slot(rb_objspace_t *objspace
while (p < pend) {
p->as.free.flags = 0;
- p->as.free.next = freelist;
- freelist = p;
+ p->as.free.next = slot->freelist;
+ slot->freelist = p;
p++;
}
}
@@ -1041,37 +1042,52 @@ rb_during_gc(void)
return during_gc;
}
-#define RANY(o) ((RVALUE*)(o))
+static struct heaps_slot *
+next_sweptslot(rb_objspace_t *objspace)
+{
+ struct heaps_slot *slot = objspace->heap.swept_slots;
-static VALUE
-rb_newobj_from_heap(rb_objspace_t *objspace)
+ while (slot && slot != objspace->heap.sweep_slots) {
+ if (slot->freelist) {
+ return slot;
+ }
+ slot = objspace->heap.swept_slots = objspace->heap.swept_slots->next;
+ }
+ return 0;
+}
+
+static struct heaps_slot *
+get_sweptslot(rb_thread_t *th, rb_objspace_t *objspace)
{
- VALUE obj;
+ struct heaps_slot *slot = th->heap_slot;
+
+ if (LIKELY(slot != 0) && LIKELY(slot->freelist != 0)) {
+ return slot;
+ }
+
+ while (1) {
+ slot = next_sweptslot(objspace);
+
+ if (LIKELY(slot != 0) && LIKELY(slot->freelist != 0)) {
+ th->heap_slot = slot;
+ return slot;
+ }
- if ((ruby_gc_stress && !ruby_disable_gc_stress) || !freelist) {
if (!gc_lazy_sweep(objspace)) {
during_gc = 0;
rb_memerror();
}
}
-
- obj = (VALUE)freelist;
- freelist = freelist->as.free.next;
-
- MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
- RANY(obj)->file = rb_sourcefile();
- RANY(obj)->line = rb_sourceline();
-#endif
- GC_PROF_INC_LIVE_NUM;
-
- return obj;
}
+#define RANY(o) ((RVALUE*)(o))
+
VALUE
rb_newobj(void)
{
rb_objspace_t *objspace = &rb_objspace;
+ VALUE obj;
+ struct heaps_slot *slot;
if (during_gc) {
dont_gc = 1;
@@ -1079,7 +1095,26 @@ rb_newobj(void)
rb_bug("object allocation during garbage collection phase");
}
- return rb_newobj_from_heap(objspace);
+ if (ruby_gc_stress && !ruby_disable_gc_stress) {
+ if (!garbage_collect(objspace)) {
+ during_gc = 0;
+ rb_memerror();
+ }
+ }
+
+ slot = get_sweptslot(GET_THREAD(), objspace);
+
+ obj = (VALUE)slot->freelist;
+ slot->freelist = slot->freelist->as.free.next;
+
+ MEMZERO((void*)obj, RVALUE, 1);
+#ifdef GC_DEBUG
+ RANY(obj)->file = rb_sourcefile();
+ RANY(obj)->line = rb_sourceline();
+#endif
+ GC_PROF_INC_LIVE_NUM;
+
+ return obj;
}
NODE*
@@ -1812,12 +1847,36 @@ gc_mark_children(rb_objspace_t *objspace
static int obj_free(rb_objspace_t *, VALUE);
static inline void
-add_freelist(rb_objspace_t *objspace, RVALUE *p)
+add_freelist(struct heaps_slot *slot, RVALUE *p)
{
VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
p->as.free.flags = 0;
- p->as.free.next = freelist;
- freelist = p;
+ p->as.free.next = slot->freelist;
+ slot->freelist = p;
+}
+
+static struct heaps_slot *
+slot_of_obj(rb_objspace_t *objspace, void *ptr)
+{
+ register RVALUE *p = RANY(ptr);
+ register struct sorted_heaps_slot *heap;
+ register size_t hi, lo, mid;
+
+ lo = 0;
+ hi = heaps_used;
+ while (lo < hi) {
+ mid = (lo + hi) / 2;
+ heap = &objspace->heap.sorted[mid];
+ if (heap->start <= p) {
+ if (p < heap->end)
+ return heap->slot;
+ lo = mid + 1;
+ }
+ else {
+ hi = mid;
+ }
+ }
+ rb_bug("slot_of_obj: unreachable");
}
static void
@@ -1832,7 +1891,8 @@ finalize_list(rb_objspace_t *objspace, R
}
else {
GC_PROF_DEC_LIVE_NUM;
- add_freelist(objspace, p);
+ // add_freelist(slot_of_obj(objspace, p), p);
+ RANY(p)->as.free.flags = 0;
}
}
else {
@@ -1854,11 +1914,12 @@ unlink_heap_slot(rb_objspace_t *objspace
heaps = slot->next;
if (objspace->heap.sweep_slots == slot)
objspace->heap.sweep_slots = slot->next;
+ if (objspace->heap.swept_slots == slot)
+ objspace->heap.swept_slots = slot->next;
slot->prev = NULL;
slot->next = NULL;
}
-
static void
free_unused_heaps(rb_objspace_t *objspace)
{
@@ -1899,9 +1960,10 @@ slot_sweep(rb_objspace_t *objspace, stru
{
size_t free_num = 0, final_num = 0;
RVALUE *p, *pend;
- RVALUE *free = freelist, *final = deferred_final_list;
+ RVALUE *final = deferred_final_list;
int deferred;
+ sweep_slot->freelist = 0;
p = sweep_slot->slot; pend = p + sweep_slot->limit;
while (p < pend) {
if (!(p->as.basic.flags & FL_MARK)) {
@@ -1918,7 +1980,7 @@ slot_sweep(rb_objspace_t *objspace, stru
final_num++;
}
else {
- add_freelist(objspace, p);
+ add_freelist(sweep_slot, p);
free_num++;
}
}
@@ -1940,20 +2002,25 @@ slot_sweep(rb_objspace_t *objspace, stru
pp->as.free.flags |= FL_SINGLETON; /* freeing page mark */
}
sweep_slot->limit = final_num;
- freelist = free; /* cancel this page from freelist */
+ sweep_slot->freelist = 0;
unlink_heap_slot(objspace, sweep_slot);
}
else {
objspace->heap.free_num += free_num;
}
objspace->heap.final_num += final_num;
+
+ if (final_num > 0) {
+ /* kick finalizer */
+ RUBY_VM_SET_FINALIZER_INTERRUPT(GET_THREAD());
+ }
}
static int
ready_to_gc(rb_objspace_t *objspace)
{
if (dont_gc || during_gc) {
- if (!freelist) {
+ if (!next_sweptslot(objspace)) {
if (!heaps_increment(objspace)) {
set_heaps_increment(objspace);
heaps_increment(objspace);
@@ -1967,7 +2034,7 @@ ready_to_gc(rb_objspace_t *objspace)
static void
before_gc_sweep(rb_objspace_t *objspace)
{
- freelist = 0;
+ objspace->heap.swept_slots = 0;
objspace->heap.do_heap_free = (size_t)((heaps_used * HEAP_OBJ_LIMIT) * 0.65);
objspace->heap.free_min = (size_t)((heaps_used * HEAP_OBJ_LIMIT) * 0.2);
if (objspace->heap.free_min < FREE_MIN) {
@@ -1975,7 +2042,13 @@ before_gc_sweep(rb_objspace_t *objspace)
objspace->heap.free_min = FREE_MIN;
}
objspace->heap.sweep_slots = heaps;
+ objspace->heap.swept_slots = heaps;
objspace->heap.free_num = 0;
+
+ /* sweep unlinked method entries */
+ if (GET_VM()->unlinked_method_entry_list) {
+ rb_sweep_method_entry(GET_VM());
+ }
}
static void
@@ -1995,20 +2068,9 @@ after_gc_sweep(rb_objspace_t *objspace)
}
malloc_increase = 0;
- if (deferred_final_list) {
- /* clear finalization list */
- RUBY_VM_SET_FINALIZER_INTERRUPT(GET_THREAD());
- }
- else{
free_unused_heaps(objspace);
}
- /* sweep unlinked method entries */
- if (th->vm->unlinked_method_entry_list) {
- rb_sweep_method_entry(th->vm);
- }
-}
-
static int
lazy_sweep(rb_objspace_t *objspace)
{
@@ -2019,7 +2081,7 @@ lazy_sweep(rb_objspace_t *objspace)
next = objspace->heap.sweep_slots->next;
slot_sweep(objspace, objspace->heap.sweep_slots);
objspace->heap.sweep_slots = next;
- if (freelist) {
+ if (next_sweptslot(objspace)) {
during_gc = 0;
return TRUE;
}
@@ -2077,7 +2139,7 @@ gc_lazy_sweep(rb_objspace_t *objspace)
GC_PROF_SWEEP_TIMER_START;
if(!(res = lazy_sweep(objspace))) {
after_gc_sweep(objspace);
- if(freelist) {
+ if(next_sweptslot(objspace)) {
res = TRUE;
during_gc = 0;
}
@@ -2115,7 +2177,8 @@ rb_gc_force_recycle(VALUE p)
RANY(p)->as.free.flags = 0;
}
else {
- add_freelist(objspace, (RVALUE *)p);
+ // add_freelist(slot_of_obj(objspace, (void *)p), (RVALUE *)p);
+ RANY(p)->as.free.flags = 0;
}
}
@@ -2785,20 +2848,17 @@ run_single_final(VALUE arg)
}
static void
-run_finalizer(rb_objspace_t *objspace, VALUE obj, VALUE objid, VALUE table)
+run_finalizer(rb_objspace_t *objspace, VALUE objid, VALUE table)
{
long i;
int status;
VALUE args[3];
+ VALUE objid_ary = rb_obj_freeze(rb_ary_new3(1, objid));
- args[1] = 0;
- args[2] = (VALUE)rb_safe_level();
- if (!args[1] && RARRAY_LEN(table) > 0) {
- args[1] = rb_obj_freeze(rb_ary_new3(1, objid));
- }
for (i=0; i<RARRAY_LEN(table); i++) {
VALUE final = RARRAY_PTR(table)[i];
args[0] = RARRAY_PTR(final)[1];
+ args[1] = objid_ary;
args[2] = FIX2INT(RARRAY_PTR(final)[0]);
rb_protect(run_single_final, (VALUE)args, &status);
}
@@ -2828,7 +2888,7 @@ run_final(rb_objspace_t *objspace, VALUE
key = (st_data_t)obj;
if (st_delete(finalizer_table, &key, &table)) {
- run_finalizer(objspace, obj, objid, (VALUE)table);
+ run_finalizer(objspace, objid, (VALUE)table);
}
}
@@ -2843,17 +2903,10 @@ finalize_deferred(rb_objspace_t *objspac
}
}
-static void
-gc_finalize_deferred(rb_objspace_t *objspace)
-{
- finalize_deferred(objspace);
- free_unused_heaps(objspace);
-}
-
void
rb_gc_finalize_deferred(void)
{
- gc_finalize_deferred(&rb_objspace);
+ finalize_deferred(&rb_objspace);
}
static int
@@ -2919,7 +2972,7 @@ rb_objspace_call_finalizer(rb_objspace_t
st_foreach(finalizer_table, force_chain_object, (st_data_t)&list);
while (list) {
struct force_finalize_list *curr = list;
- run_finalizer(objspace, curr->obj, rb_obj_id(curr->obj), curr->table);
+ run_finalizer(objspace, rb_obj_id(curr->obj), curr->table);
st_delete(finalizer_table, (st_data_t*)&curr->obj, 0);
list = curr->next;
xfree(curr);
@@ -2973,7 +3026,8 @@ rb_gc(void)
{
rb_objspace_t *objspace = &rb_objspace;
garbage_collect(objspace);
- gc_finalize_deferred(objspace);
+ finalize_deferred(objspace);
+ free_unused_heaps(objspace);
}
/*
Index: vm.c
===================================================================
--- vm.c (revision 29675)
+++ vm.c (working copy)
@@ -1680,6 +1680,8 @@ rb_thread_mark(void *ptr)
}
mark_event_hooks(th->event_hooks);
+
+ th->heap_slot = 0;
}
RUBY_MARK_LEAVE("thread");