[#15707] Schedule for the 1.8.7 release — "Akinori MUSHA" <knu@...>

Hi, developers,

21 messages 2008/03/01

[#15740] Copy-on-write friendly garbage collector — Hongli Lai <hongli@...99.net>

Hi.

31 messages 2008/03/03
[#15742] Re: Copy-on-write friendly garbage collector — Yukihiro Matsumoto <matz@...> 2008/03/03

Hi,

[#15829] Re: Copy-on-write friendly garbage collector — Daniel DeLorme <dan-ml@...42.com> 2008/03/08

Yukihiro Matsumoto wrote:

[#15756] embedding Ruby 1.9.0 inside pthread — "Suraj Kurapati" <sunaku@...>

Hello,

18 messages 2008/03/03
[#15759] Re: embedding Ruby 1.9.0 inside pthread — Nobuyoshi Nakada <nobu@...> 2008/03/04

Hi,

[#15760] Re: embedding Ruby 1.9.0 inside pthread — Yukihiro Matsumoto <matz@...> 2008/03/04

Hi,

[#15762] Re: embedding Ruby 1.9.0 inside pthread — "Suraj N. Kurapati" <sunaku@...> 2008/03/04

Yukihiro Matsumoto wrote:

[#15783] Adding startup and shutdown to Test::Unit — Daniel Berger <Daniel.Berger@...>

Hi all,

15 messages 2008/03/04

[#15835] TimeoutError in core, timeouts for ConditionVariable#wait — MenTaLguY <mental@...>

I've been reworking JRuby's stdlib to improve performance and fix

10 messages 2008/03/09

[#15990] Recent changes in Range#step behavior — "Vladimir Sizikov" <vsizikov@...>

Hi,

35 messages 2008/03/23
[#15991] Re: Recent changes in Range#step behavior — Dave Thomas <dave@...> 2008/03/23

[#15993] Re: Recent changes in Range#step behavior — "Vladimir Sizikov" <vsizikov@...> 2008/03/23

Hi Dave,

[#15997] Re: Recent changes in Range#step behavior — Dave Thomas <dave@...> 2008/03/23

[#16024] Re: Recent changes in Range#step behavior — "Vladimir Sizikov" <vsizikov@...> 2008/03/26

Hi Dave,

[#16025] Re: Recent changes in Range#step behavior — Yukihiro Matsumoto <matz@...> 2008/03/26

Hi,

[#16026] Re: Recent changes in Range#step behavior — Dave Thomas <dave@...> 2008/03/26

[#16027] Re: Recent changes in Range#step behavior — Yukihiro Matsumoto <matz@...> 2008/03/26

Hi,

[#16029] Re: Recent changes in Range#step behavior — Dave Thomas <dave@...> 2008/03/26

[#16030] Re: Recent changes in Range#step behavior — Yukihiro Matsumoto <matz@...> 2008/03/26

Hi,

[#16031] Re: Recent changes in Range#step behavior — Dave Thomas <dave@...> 2008/03/26

[#16032] Re: Recent changes in Range#step behavior — "Vladimir Sizikov" <vsizikov@...> 2008/03/26

On Wed, Mar 26, 2008 at 7:01 PM, Dave Thomas <dave@pragprog.com> wrote:

[#16033] Re: Recent changes in Range#step behavior — Dave Thomas <dave@...> 2008/03/26

[#16041] Re: Recent changes in Range#step behavior — David Flanagan <david@...> 2008/03/26

Dave Thomas wrote:

Re: Copy-on-write friendly garbage collector

From: Daniel DeLorme <dan-ml@...42.com>
Date: 2008-03-08 10:01:51 UTC
List: ruby-core #15831
Third email in a row... I hope that's not considered spamming ~_~

Since the patch was based on r15675, I tried to merge that plus my 
changes into the trunk (r15730) but ran into some conflicts. While 
resolving them, I realized that r15683 and 15684 (the apparent source of 
the conflict) were sorting the heap list by order of memory location. 
But that's not needed if a linear search is more efficient than a 
bsearch, so I removed the sorting. New heaps are now added at the end of 
the list and the list is searched from last to first (therefore largest 
to smallest).

So now, for the following test case which generates 9 heaps
   GC.disable
   x=(1..1000000).map{"x"*10}
   GC.enable
   GC.start
The COW-friendly version is only 1-2% slower than r15730.

I hope this makes it into the trunk but please do review the code.

--
Daniel

Attachments (1)

cow-friendly.patch (15.2 KB, text/x-diff)
Index: debug.c
===================================================================
--- debug.c	(revision 15730)
+++ debug.c	(working copy)
@@ -29,8 +29,8 @@
         RUBY_ENC_CODERANGE_7BIT    = ENC_CODERANGE_7BIT,
         RUBY_ENC_CODERANGE_VALID   = ENC_CODERANGE_VALID,
         RUBY_ENC_CODERANGE_BROKEN  = ENC_CODERANGE_BROKEN, 
-        RUBY_FL_MARK        = FL_MARK,
-        RUBY_FL_RESERVED    = FL_RESERVED,
+        RUBY_FL_RESERVED0   = FL_RESERVED0,
+        RUBY_FL_RESERVED1   = FL_RESERVED1,
         RUBY_FL_FINALIZE    = FL_FINALIZE,
         RUBY_FL_TAINT       = FL_TAINT,
         RUBY_FL_EXIVAR      = FL_EXIVAR,
Index: include/ruby/ruby.h
===================================================================
--- include/ruby/ruby.h	(revision 15730)
+++ include/ruby/ruby.h	(working copy)
@@ -621,8 +621,8 @@
 #define RVALUES(obj) (R_CAST(RValues)(obj))
 
 #define FL_SINGLETON FL_USER0
-#define FL_MARK      (((VALUE)1)<<5)
-#define FL_RESERVED  (((VALUE)1)<<6) /* will be used in the future GC */
+#define FL_RESERVED0 (((VALUE)1)<<5) /* will be used in the future GC */
+#define FL_RESERVED1 (((VALUE)1)<<6) /* will be used in the future GC */
 #define FL_FINALIZE  (((VALUE)1)<<7)
 #define FL_TAINT     (((VALUE)1)<<8)
 #define FL_EXIVAR    (((VALUE)1)<<9)
@@ -716,6 +716,7 @@
 void rb_register_mark_object(VALUE);
 void rb_gc_register_address(VALUE*);
 void rb_gc_unregister_address(VALUE*);
+int rb_gc_is_thread_marked(VALUE);
 
 ID rb_intern(const char*);
 ID rb_intern2(const char*, long);
Index: gc.c
===================================================================
--- gc.c	(revision 15730)
+++ gc.c	(working copy)
@@ -22,8 +22,14 @@
 #include "gc.h"
 #include <stdio.h>
 #include <setjmp.h>
+#include <math.h>
 #include <sys/types.h>
 
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
 #ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>
 #endif
@@ -145,6 +151,9 @@
     void *membase;
     RVALUE *slot;
     int limit;
+    RVALUE *slotlimit;
+    int *marks;
+    int marks_size;
 } *heaps;
 static int heaps_length = 0;
 static int heaps_used   = 0;
@@ -322,7 +331,29 @@
 	RUBY_CRITICAL(free(x));
 }
 
+static int debugging = 0;
 
+#define OPTION_ENABLED(name) (getenv((name)) && *getenv((name)) && *getenv((name)) != '0')
+
+static void *
+alloc_ruby_heap(size_t size)
+{
+    return malloc(size);
+}
+
+static void
+free_ruby_heap(void *heap)
+{
+    free(heap);
+}
+
+static void
+init_debugging()
+{
+    debugging = OPTION_ENABLED("RUBY_GC_DEBUG");
+}
+
+
 /*
  *  call-seq:
  *     GC.enable    => true or false
@@ -413,11 +444,107 @@
     }
 }
 
+static struct heaps_slot *last_heap = NULL;
+
+static inline struct heaps_slot *
+find_heap_slot_for_object(RVALUE *object)
+{
+    struct heaps_slot *heap;
+    register int i;
+
+    /* Look in the cache first. */
+    if (last_heap != NULL && object >= last_heap->slot
+	&& object < last_heap->slotlimit) {
+	return last_heap;
+    }
+    /* find heap_slot for object using linear search
+     * (faster than bsearch because there are only a few heaps)
+     */
+    i = heaps_used;
+    while (i) {
+	i--;
+	heap = &heaps[i];
+	if (heap->slot <= object) {
+	    if (object < heap->slotlimit) {
+		/* Cache this result. According to empirical evidence, the chance is
+		 * high that the next lookup will be for the same heap slot.
+		 */
+		last_heap = heap;
+		return heap;
+	    }
+	}
+    }
+    return NULL;
+}
+
+static inline void
+find_position_in_bitfield(struct heaps_slot *hs, RVALUE *object,
+                          unsigned int *bitfield_index, unsigned int *bitfield_offset)
+{
+    unsigned int index;
+
+    index = object - hs->slot;
+    *bitfield_index = index / (sizeof(int) * 8);
+    *bitfield_offset = index % (sizeof(int) * 8);
+}
+
+
 static void
+rb_mark_table_add(RVALUE *object)
+{
+    struct heaps_slot *hs;
+    unsigned int bitfield_index, bitfield_offset;
+
+    hs = find_heap_slot_for_object(object);
+    if (hs != NULL) {
+	find_position_in_bitfield(hs, object, &bitfield_index, &bitfield_offset);
+	hs->marks[bitfield_index] |= (1 << bitfield_offset);
+    }
+}
+
+static inline int
+rb_mark_table_heap_contains(struct heaps_slot *hs, RVALUE *object)
+{
+    unsigned int bitfield_index, bitfield_offset;
+
+    find_position_in_bitfield(hs, object, &bitfield_index, &bitfield_offset);
+    return hs->marks[bitfield_index] & (1 << bitfield_offset);
+}
+
+static inline int
+rb_mark_table_contains(RVALUE *object)
+{
+    struct heaps_slot *hs;
+
+    hs = find_heap_slot_for_object(object);
+    if (hs != NULL) {
+	return rb_mark_table_heap_contains(hs, object);
+    }
+}
+
+static inline void
+rb_mark_table_heap_remove(struct heaps_slot *hs, RVALUE *object)
+{
+    unsigned int bitfield_index, bitfield_offset;
+    find_position_in_bitfield(hs, object, &bitfield_index, &bitfield_offset);
+    hs->marks[bitfield_index] &= ~(1 << bitfield_offset);
+}
+
+static void
+rb_mark_table_remove(RVALUE *object)
+{
+    struct heaps_slot *hs;
+
+    hs = find_heap_slot_for_object(object);
+    if (hs != NULL) {
+	rb_mark_table_heap_remove(hs, object);
+    }
+}
+
+static void
 add_heap(void)
 {
-    RVALUE *p, *pend, *membase;
-    long hi, lo, mid;
+    RVALUE *p, *pend;
 
     if (heaps_used == heaps_length) {
 	/* Realloc heaps */
@@ -438,45 +565,26 @@
     }
 
     for (;;) {
-	RUBY_CRITICAL(p = (RVALUE*)malloc(sizeof(RVALUE)*(heap_slots+1)));
+	RUBY_CRITICAL(p = (RVALUE*)alloc_ruby_heap(sizeof(RVALUE)*(heap_slots+1)));
 	if (p == 0) {
 	    if (heap_slots == HEAP_MIN_SLOTS) {
 		rb_memerror();
 	    }
 	    heap_slots = HEAP_MIN_SLOTS;
+	    continue;
 	}
-	else {
-	    break;
-	}
+	heaps[heaps_used].membase = p;
+	if ((VALUE)p % sizeof(RVALUE) == 0)
+	    heap_slots += 1;
+	else
+	    p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE)));
+	heaps[heaps_used].slot = p;
+	heaps[heaps_used].limit = heap_slots;
+	heaps[heaps_used].slotlimit = p + heap_slots;
+        heaps[heaps_used].marks_size = (int) (ceil(heap_slots / (sizeof(int) * 8.0)));
+        heaps[heaps_used].marks = (int *) calloc(heaps[heaps_used].marks_size, sizeof(int));
+	break;
     }
-
-    lo = 0;
-    hi = heaps_used;
-    while (lo < hi) {
-	mid = (lo + hi) / 2;
-	membase = heaps[mid].membase;
-	if (membase < p) {
-	    lo = mid + 1;
-	}
-	else if (membase > p) {
-	    hi = mid;
-	}
-	else {
-	    rb_bug("same heap slot is allocated: %p at %ld", p, mid);
-	}
-    }
-
-    membase = p;
-    if ((VALUE)p % sizeof(RVALUE) == 0)
-	heap_slots += 1;
-    else
-	p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE)));
-    if (hi < heaps_used) {
-	MEMMOVE(&heaps[hi+1], &heaps[hi], struct heaps_slot, heaps_used - hi);
-    }
-    heaps[hi].membase = membase;
-    heaps[hi].slot = p;
-    heaps[hi].limit = heap_slots;
     pend = p + heap_slots;
     if (lomem == 0 || lomem > p) lomem = p;
     if (himem < pend) himem = pend;
@@ -508,6 +616,7 @@
     freelist = freelist->as.free.next;
 
     MEMZERO((void*)obj, RVALUE, 1);
+    RANY(obj)->as.free.flags = 0;
 #ifdef GC_DEBUG
     RANY(obj)->file = rb_sourcefile();
     RANY(obj)->line = rb_sourceline();
@@ -710,14 +819,15 @@
 gc_mark_all(void)
 {
     RVALUE *p, *pend;
+    struct heaps_slot *heap;
     int i;
 
     init_mark_stack();
     for (i = 0; i < heaps_used; i++) {
-	p = heaps[i].slot; pend = p + heaps[i].limit;
+	heap = &heaps[i];
+	p = heap->slot; pend = heap->slotlimit;
 	while (p < pend) {
-	    if ((p->as.basic.flags & FL_MARK) &&
-		(p->as.basic.flags != FL_MARK)) {
+	    if (rb_mark_table_heap_contains(heap, p) && (p->as.basic.flags != 0)) {
 		gc_mark_children((VALUE)p, 0);
 	    }
 	    p++;
@@ -751,21 +861,8 @@
     if (p < lomem || p > himem) return Qfalse;
     if ((VALUE)p % sizeof(RVALUE) != 0) return Qfalse;
 
-    /* check if p looks like a pointer using bsearch*/
-    lo = 0;
-    hi = heaps_used;
-    while (lo < hi) {
-	mid = (lo + hi) / 2;
-	heap = &heaps[mid];
-	if (heap->slot <= p) {
-	    if (p < heap->slot + heap->limit)
-		return Qtrue;
-	    lo = mid + 1;
-	}
-	else {
-	    hi = mid;
-	}
-    }
+    if (find_heap_slot_for_object(p))
+	return Qtrue;
     return Qfalse;
 }
 
@@ -871,8 +968,8 @@
     obj = RANY(ptr);
     if (rb_special_const_p(ptr)) return; /* special const not marked */
     if (obj->as.basic.flags == 0) return;       /* free cell */
-    if (obj->as.basic.flags & FL_MARK) return;  /* already marked */
-    obj->as.basic.flags |= FL_MARK;
+    if (rb_mark_table_contains(obj)) return;  /* already marked */
+    rb_mark_table_add(obj);
 
     if (lev > GC_LEVEL_MAX || (lev == 0 && ruby_stack_check())) {
 	if (!mark_stack_overflow) {
@@ -906,8 +1003,8 @@
     obj = RANY(ptr);
     if (rb_special_const_p(ptr)) return; /* special const not marked */
     if (obj->as.basic.flags == 0) return;       /* free cell */
-    if (obj->as.basic.flags & FL_MARK) return;  /* already marked */
-    obj->as.basic.flags |= FL_MARK;
+    if (rb_mark_table_contains(obj)) return;  /* already marked */
+    rb_mark_table_add(obj);
 
   marking:
     if (FL_TEST(obj, FL_EXIVAR)) {
@@ -1161,10 +1258,15 @@
     while (p) {
 	RVALUE *tmp = p->as.free.next;
 	run_final((VALUE)p);
-	if (!FL_TEST(p, FL_SINGLETON)) { /* not freeing page */
+	/* Don't free objects that are singletons, or objects that are already freed.
+	 * The latter is to prevent the unnecessary marking of memory pages as dirty,
+	 * which can destroy copy-on-write semantics.
+	 */
+	if (!FL_TEST(p, FL_SINGLETON) && p->as.free.flags != 0) {
             VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
 	    p->as.free.flags = 0;
 	    p->as.free.next = freelist;
+	    rb_mark_table_remove(p);
 	    freelist = p;
 	}
 	p = tmp;
@@ -1178,7 +1280,8 @@
 
     for (i = j = 1; j < heaps_used; i++) {
 	if (heaps[i].limit == 0) {
-	    free(heaps[i].membase);
+	    free_ruby_heap(heaps[i].membase);
+	    free(heaps[i].marks);
 	    heaps_used--;
 	}
 	else {
@@ -1219,32 +1322,38 @@
 	int n = 0;
 	RVALUE *free = freelist;
 	RVALUE *final = final_list;
+	struct heaps_slot *heap = &heaps[i];
 
-	p = heaps[i].slot; pend = p + heaps[i].limit;
+	p = heap->slot; pend = heap->slotlimit;
 	while (p < pend) {
-	    if (!(p->as.basic.flags & FL_MARK)) {
+	    if (!rb_mark_table_heap_contains(heap, p)) {
 		if (p->as.basic.flags) {
 		    obj_free((VALUE)p);
 		}
 		if (need_call_final && FL_TEST(p, FL_FINALIZE)) {
-		    p->as.free.flags = FL_MARK; /* remain marked */
+		    p->as.free.flags = FL_FINALIZE;
 		    p->as.free.next = final_list;
 		    final_list = p;
 		}
 		else {
                     VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
-		    p->as.free.flags = 0;
-		    p->as.free.next = freelist;
+		    /* Do not touch the fields if they don't have to be modified.
+		     * This is in order to preserve copy-on-write semantics.
+		     */
+		    if (p->as.free.flags != 0)
+			p->as.free.flags = 0;
+		    if (p->as.free.next != freelist)
+			p->as.free.next = freelist;
 		    freelist = p;
 		}
 		n++;
 	    }
-	    else if (RBASIC(p)->flags == FL_MARK) {
+	    else if (RBASIC(p)->flags == FL_FINALIZE) {
 		/* objects to be finalized */
-		/* do nothing remain marked */
+		/* do nothing here */
 	    }
 	    else {
-		RBASIC(p)->flags &= ~FL_MARK;
+		rb_mark_table_heap_remove(heap, p);
 		live++;
 	    }
 	    p++;
@@ -1286,6 +1395,7 @@
     VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
     RANY(p)->as.free.flags = 0;
     RANY(p)->as.free.next = freelist;
+    rb_mark_table_remove((RVALUE *) p);
     freelist = RANY(p);
 }
 
@@ -1476,6 +1586,7 @@
 
     FLUSH_REGISTER_WINDOWS;
     /* This assumes that all registers are saved into the jmp_buf (and stack) */
+    memset(save_regs_gc_mark, 0, sizeof(save_regs_gc_mark));
     setjmp(save_regs_gc_mark);
     mark_locations_array((VALUE*)save_regs_gc_mark,
 			 sizeof(save_regs_gc_mark) / sizeof(VALUE));
@@ -1515,6 +1626,7 @@
 
     SET_STACK_END;
 
+    last_heap = NULL;
     init_mark_stack();
 
     th->vm->self ? rb_gc_mark(th->vm->self) : rb_vm_mark(th->vm);
@@ -1616,6 +1728,18 @@
     rb_gc_stack_maxsize = size;
 }
 
+int
+rb_gc_is_thread_marked(the_thread)
+    VALUE the_thread;
+{
+    if (FL_ABLE(the_thread)) {
+	return rb_mark_table_contains((RVALUE *) the_thread);
+    }
+    else {
+	return 0;
+    }
+}
+
 void
 Init_stack(VALUE *addr)
 {
@@ -1837,12 +1961,9 @@
     VALUE of;
 
     rb_secure(4);
-    if (argc == 0) {
+    if (rb_scan_args(argc, argv, "01", &of) == 0) {
 	of = 0;
     }
-    else {
-	rb_scan_args(argc, argv, "01", &of);
-    }
     RETURN_ENUMERATOR(os, 1, &of);
     return os_obj_of(of);
 }
@@ -2025,6 +2146,7 @@
 rb_gc_call_finalizer_at_exit(void)
 {
     RVALUE *p, *pend;
+    struct heaps_slot *heap;
     int i;
 
     /* finalizers are part of garbage collection */
@@ -2048,12 +2170,14 @@
     }
     /* run data object's finalizers */
     for (i = 0; i < heaps_used; i++) {
-	p = heaps[i].slot; pend = p + heaps[i].limit;
+	heap = &heaps[i];
+	p = heap->slot; pend = heap->slotlimit;
 	while (p < pend) {
 	    if (BUILTIN_TYPE(p) == T_DATA &&
 		DATA_PTR(p) && RANY(p)->as.data.dfree &&
 		RANY(p)->as.basic.klass != rb_cThread) {
 		p->as.free.flags = 0;
+		rb_mark_table_heap_remove(heap, p);
 		if ((long)RANY(p)->as.data.dfree == -1) {
 		    RUBY_CRITICAL(free(DATA_PTR(p)));
 		}
@@ -2065,6 +2189,7 @@
 	    else if (BUILTIN_TYPE(p) == T_FILE) {
 		if (rb_io_fptr_finalize(RANY(p)->as.file.fptr)) {
 		    p->as.free.flags = 0;
+		    rb_mark_table_heap_remove(heap, p);
                     VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
 		}
 	    }
@@ -2285,6 +2410,61 @@
     return hash;
 }
 
+static VALUE
+os_statistics()
+{
+    int i;
+    int n = 0;
+    unsigned int objects = 0;
+    unsigned int total_heap_size = 0;
+    unsigned int ast_nodes = 0;
+    char message[1024];
+
+    for (i = 0; i < heaps_used; i++) {
+	RVALUE *p, *pend;
+
+	p = heaps[i].slot;
+	pend = p + heaps[i].limit;
+	for (;p < pend; p++) {
+	    if (p->as.basic.flags) {
+		int isAST = 0;
+		switch (TYPE(p)) {
+		  case T_ICLASS:
+		  case T_NODE:
+		    isAST = 1;
+		    break;
+		  case T_CLASS:
+		    if (FL_TEST(p, FL_SINGLETON)) {
+		        isAST = 1;
+		        break;
+		    }
+		  default:
+		    break;
+		}
+		objects++;
+		if (isAST) {
+		   ast_nodes++;
+		}
+	    }
+	}
+	total_heap_size += (void*)pend - heaps[i].membase;
+    }
+
+    snprintf(message, sizeof(message),
+        "Number of objects: %d (%d AST nodes, %.2f%%)\n"
+        "Heap slot size: %d\n"
+        "Number of heaps: %d\n"
+        "Total size of objects: %.2f KB\n"
+        "Total size of heaps: %.2f KB\n",
+        objects, ast_nodes, ast_nodes * 100 / (double) objects,
+        sizeof(RVALUE),
+        heaps_used,
+        objects * sizeof(RVALUE) / 1024.0,
+        total_heap_size / 1024.0
+    );
+    return rb_str_new2(message);
+}
+
 /*
  *  The <code>GC</code> module provides an interface to Ruby's mark and
  *  sweep garbage collection mechanism. Some of the underlying methods
@@ -2317,6 +2497,8 @@
 
     rb_define_module_function(rb_mObSpace, "_id2ref", id2ref, 1);
 
+    rb_define_module_function(rb_mObSpace, "statistics", os_statistics, 0);
+
     rb_gc_register_address(&rb_mObSpace);
     rb_global_variable(&finalizers);
     rb_gc_unregister_address(&rb_mObSpace);
@@ -2332,4 +2514,6 @@
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);
 
     rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1);
+
+    init_debugging();
 }

In This Thread