From: Yusuke ENDOH Date: 2008-11-18T00:44:53+09:00 Subject: [ruby-dev:37106] [Feature:1.9] speed up continuation in 1.9 遠藤です。 1.9 の継続は 1.8 に比べて極端に遅いようです。 $ time ruby18 -e 'i = 0; callcc {|c| $c = c }; i += 1; $c.call if i < 1000000' real 0m1.060s user 0m1.050s sys 0m0.010s $ time ruby19 -rcontinuation -e 'i = 0; callcc {|c| $c = c }; i += 1; $c.call if i < 1000000' real 1m57.022s user 1m56.780s sys 0m0.180s capture や call の際、VM のスタックを常に丸ごとコピーしているのが 原因で、必要なところ (先頭の sp 部分と終端の cfp 部分) だけコピー するようにしたら、1.8 並に速くなりました。 $ time ./ruby.fast-cont -rcontinuation -e 'i = 0; callcc {|c| $c = c }; i += 1; $c.call if i < 1000000' real 0m0.660s user 0m0.660s sys 0m0.000s 私の環境で test-all が通ることは確認しています。 とくに異論がなければコミットしようと思います。 Index: cont.c =================================================================== --- cont.c (revision 20241) +++ cont.c (working copy) @@ -14,6 +14,8 @@ #include "gc.h" #include "eval_intern.h" +#define CAPTURE_JUST_VALID_VM_STACK 1 + enum context_type { CONTINUATION_CONTEXT = 0, FIBER_CONTEXT = 1, @@ -25,6 +27,10 @@ VALUE self; VALUE value; VALUE *vm_stack; +#ifdef CAPTURE_JUST_VALID_VM_STACK + int vm_stack_slen; /* length of stack (head of th->stack) */ + int vm_stack_clen; /* length of control frames (tail of th->stack) */ +#endif VALUE *machine_stack; VALUE *machine_stack_src; #ifdef __ia64 @@ -75,8 +81,13 @@ rb_thread_mark(&cont->saved_thread); if (cont->vm_stack) { +#ifdef CAPTURE_JUST_VALID_VM_STACK rb_gc_mark_locations(cont->vm_stack, - cont->vm_stack + cont->saved_thread.stack_size); + cont->vm_stack + cont->vm_stack_slen + cont->vm_stack_clen); +#elif + rb_gc_mark_localtion(cont->vm_stack, + cont->vm_stack, cont->saved_thread.stack_size); +#endif } if (cont->machine_stack) { @@ -247,8 +258,16 @@ contval = cont->self; sth = &cont->saved_thread; +#ifdef CAPTURE_JUST_VALID_VM_STACK + cont->vm_stack_slen = th->cfp->sp + th->mark_stack_len - th->stack; + cont->vm_stack_clen = th->stack + th->stack_size - (VALUE*)th->cfp; + cont->vm_stack = ALLOC_N(VALUE, cont->vm_stack_slen + cont->vm_stack_clen); + MEMCPY(cont->vm_stack, th->stack, VALUE, cont->vm_stack_slen); + MEMCPY(cont->vm_stack + cont->vm_stack_slen, (VALUE*)th->cfp, VALUE, cont->vm_stack_clen); +#elif cont->vm_stack = ALLOC_N(VALUE, th->stack_size); MEMCPY(cont->vm_stack, th->stack, VALUE, th->stack_size); +#endif sth->stack = 0; cont_save_machine_stack(th, cont); @@ -288,7 +307,13 @@ th->stack_size = fcont->saved_thread.stack_size; th->stack = fcont->saved_thread.stack; } +#ifdef CAPTURE_JUST_VALID_VM_STACK + MEMCPY(th->stack, cont->vm_stack, VALUE, cont->vm_stack_slen); + MEMCPY(th->stack + sth->stack_size - cont->vm_stack_clen, + cont->vm_stack + cont->vm_stack_slen, VALUE, cont->vm_stack_clen); +#elif MEMCPY(th->stack, cont->vm_stack, VALUE, sth->stack_size); +#endif } else { /* fiber */ -- Yusuke ENDOH