From: ngotogenome@... Date: 2015-06-18T05:15:44+00:00 Subject: [ruby-dev:49105] [Ruby trunk - Bug #11265] deadlock on Solaris 10 since r50900 Issue #11265 has been updated by Naohisa Goto. IO.popen内で、retry_fork_async_signal_safeから呼んでいるvforkから帰ってこないようです。 なお、スタックトレースを取るためにデバッグ付き最適化無しでコンパイルしたら、再現確率が20-30回に1回と大幅に下がりました。タイミング依存が激しいようです。 IO.popen呼び出し側。vfork内で止まっているようです。 ~~~ $ /opt/solarisstudio12.3/bin/dbx - 1582 For information about new features see `help changes' To remove this message, put `dbxenv suppress_startup_message 7.9' in your .dbxrc Reading ruby Reading ld.so.1 Reading libpthread.so.1 Reading librt.so.1 Reading libgmp.so.10.1.3 Reading libsocket.so.1 Reading libdl.so.1 Reading libcrypt_d.so.1 Reading libm.so.2 Reading libc.so.1 Reading libaio.so.1 Reading libmd.so.1 Reading libnsl.so.1 Reading libgen.so.1 Reading libc_psr.so.1 Reading encdb.so Reading libmp.so.2 Reading libscf.so.1 Reading libdoor.so.1 Reading libuutil.so.1 Reading transdb.so Reading thread.so Attached to process 1582 with 3 LWPs t@1 (l@1) stopped in _vfork at 0x7fffffff7e8dd440 0x7fffffff7e8dd440: _vfork+0x0024: bcc,a,pt %icc,_vfork+0x4c ! 0x7fffffff7e8dd468 Current function is retry_fork_async_signal_safe 3529 pid = vfork(); (dbx) lwps >l@1 running in _vfork() l@2 LWP suspended in __pollsys() l@5 LWP suspended in elf_find_sym() (dbx) where current thread: t@1 [1] _vfork(0x630, 0x0, 0xffff, 0x100577aa8, 0x50, 0x15), at 0x7fffffff7e8dd440 =>[2] retry_fork_async_signal_safe(status = 0xffffffff7fffc594, ep = 0xffffffff7fffc3fc, chfunc = 0x1001518c0 = &`ruby`io.c`popen_exec(void *pp, char *errmsg, size_t errmsg_len), charg = 0xffffffff7fffc518, errmsg = 0xffffffff7fffc544 "", errmsg_buflen = 80U), line 3529 in "process.c" [3] rb_fork_async_signal_safe(status = 0xffffffff7fffc594, chfunc = 0x1001518c0 = &`ruby`io.c`popen_exec(void *pp, char *errmsg, size_t errmsg_len), charg = 0xffffffff7fffc518, fds = 4301789120U, errmsg = 0xffffffff7fffc544 "", errmsg_buflen = 80U), line 3571 in "process.c" [4] pipe_open(execarg_obj = 4301789560U, modestr = 0x10044db18 "r+", fmode = 7, convconfig = 0xffffffff7fffc6e8), line 6006 in "io.c" [5] rb_io_s_popen(argc = 2, argv = 0x10057ca28, klass = 4302377040U), line 6299 in "io.c" [6] call_cfunc_m1(func = 0x1001523c0 = &`ruby`io.c`rb_io_s_popen(int argc, VALUE *argv, VALUE klass), recv = 4302377040U, argc = 2, argv = 0x10057ca28), line 1421 in "vm_insnhelper.c" [7] vm_call_cfunc_with_frame(th = 0x100578240, reg_cfp = 0x10067c208, ci = 0x100868f88), line 1595 in "vm_insnhelper.c" [8] vm_call_cfunc(th = 0x100578240, reg_cfp = 0x10067c208, ci = 0x100868f88), line 1690 in "vm_insnhelper.c" [9] vm_call_method(th = 0x100578240, cfp = 0x10067c208, ci = 0x100868f88), line 1921 in "vm_insnhelper.c" [10] vm_call_general(th = 0x100578240, reg_cfp = 0x10067c208, ci = 0x100868f88), line 2085 in "vm_insnhelper.c" [11] vm_exec_core(th = 0x100578240, initial = 0), line 977 in "insns.def" [12] vm_exec(th = 0x100578240), line 1486 in "vm.c" [13] invoke_block_from_c(th = 0x100578240, block = 0x10067c348, self = 4305731800U, argc = 1, argv = 0xffffffff7fffd6d8, blockptr = (nil), cref = (nil), defined_class = 4305737000U, splattable = 1), line 857 in "vm.c" [14] vm_yield(th = 0x100578240, argc = 1, argv = 0xffffffff7fffd6d8), line 898 in "vm.c" [15] rb_yield_0(argc = 1, argv = 0xffffffff7fffd6d8), line 994 in "vm_eval.c" [16] rb_yield(val = 1U), line 1004 in "vm_eval.c" [17] int_dotimes(num = 3U), line 3951 in "numeric.c" [18] call_cfunc_0(func = 0x1001a1440 = &`ruby`numeric.c`int_dotimes(VALUE num), recv = 3U, argc = 0, argv = 0x10057c9a8), line 1427 in "vm_insnhelper.c" [19] vm_call_cfunc_with_frame(th = 0x100578240, reg_cfp = 0x10067c328, ci = 0x10089c378), line 1595 in "vm_insnhelper.c" [20] vm_call_cfunc(th = 0x100578240, reg_cfp = 0x10067c328, ci = 0x10089c378), line 1690 in "vm_insnhelper.c" [21] vm_call_method(th = 0x100578240, cfp = 0x10067c328, ci = 0x10089c378), line 1921 in "vm_insnhelper.c" [22] vm_call_general(th = 0x100578240, reg_cfp = 0x10067c328, ci = 0x10089c378), line 2085 in "vm_insnhelper.c" [23] vm_exec_core(th = 0x100578240, initial = 0), line 947 in "insns.def" [24] vm_exec(th = 0x100578240), line 1486 in "vm.c" [25] invoke_block_from_c(th = 0x100578240, block = 0x10067c540, self = 4305731800U, argc = 1, argv = 0xffffffff7fffe718, blockptr = (nil), cref = (nil), defined_class = 4305636240U, splattable = 1), line 857 in "vm.c" [26] vm_yield(th = 0x100578240, argc = 1, argv = 0xffffffff7fffe718), line 898 in "vm.c" [27] rb_yield_0(argc = 1, argv = 0xffffffff7fffe718), line 994 in "vm_eval.c" [28] rb_yield(val = 4305221440U), line 1004 in "vm_eval.c" [29] rb_ary_each(array = 4305221080U), line 1821 in "array.c" [30] call_cfunc_0(func = 0x100042900 = &rb_ary_each(VALUE array), recv = 4305221080U, argc = 0, argv = 0x10057c858), line 1427 in "vm_insnhelper.c" [31] vm_call_cfunc_with_frame(th = 0x100578240, reg_cfp = 0x10067c520, ci = 0x10092d448), line 1595 in "vm_insnhelper.c" [32] vm_call_cfunc(th = 0x100578240, reg_cfp = 0x10067c520, ci = 0x10092d448), line 1690 in "vm_insnhelper.c" [33] vm_call_method(th = 0x100578240, cfp = 0x10067c520, ci = 0x10092d448), line 1921 in "vm_insnhelper.c" [34] vm_call_general(th = 0x100578240, reg_cfp = 0x10067c520, ci = 0x10092d448), line 2085 in "vm_insnhelper.c" [35] vm_exec_core(th = 0x100578240, initial = 0), line 947 in "insns.def" [36] vm_exec(th = 0x100578240), line 1486 in "vm.c" [37] rb_iseq_eval_main(iseqval = 4302120600U), line 1733 in "vm.c" [38] ruby_exec_internal(n = 0x1006d2698), line 257 in "eval.c" [39] ruby_exec_node(n = 0x1006d2698), line 322 in "eval.c" [40] ruby_run_node(n = 0x1006d2698), line 314 in "eval.c" [41] main(argc = 7, argv = 0xffffffff7ffff848), line 36 in "main.c" (dbx) list 3529 pid = vfork(); 3530 else 3531 pid = fork(); 3532 #else 3533 pid = fork(); 3534 #endif 3535 if (pid == 0) {/* fork succeed, child process */ 3536 int ret; 3537 close(ep[0]); 3538 ret = disable_child_handler_fork_child(&old, errmsg, errmsg_buflen); /* async-signal-safe */ (dbx) exit detaching from process 1582 ~~~ IO.popenから起動した新プロセス側。こちらは fcntl内で止まっている? ~~~ $ /opt/solarisstudio12.3/bin/dbx - 1584 For information about new features see `help changes' To remove this message, put `dbxenv suppress_startup_message 7.9' in your .dbxrc Reading ruby Reading ld.so.1 Reading libpthread.so.1 Reading librt.so.1 Reading libgmp.so.10.1.3 Reading libsocket.so.1 Reading libdl.so.1 Reading libcrypt_d.so.1 Reading libm.so.2 Reading libc.so.1 Reading libaio.so.1 Reading libmd.so.1 Reading libnsl.so.1 Reading libgen.so.1 Reading libc_psr.so.1 Reading encdb.so Reading libmp.so.2 Reading libscf.so.1 Reading libdoor.so.1 Reading libuutil.so.1 Reading transdb.so Reading thread.so Attached to process 1584 t@1 (l@1) stopped in ___lwp_mutex_timedlock at 0x7fffffff7fe33f24 0x7fffffff7fe33f24: ___lwp_mutex_timedlock+0x0008: ta %icc,0x0000000000000040 Current function is rb_close_before_exec 5870 ret = fcntl(fd, F_GETFD); /* async-signal-safe */ (dbx) lwps >l@1 running in ___lwp_mutex_timedlock() (dbx) where current thread: t@1 [1] ___lwp_mutex_timedlock(0x7fffffff7ff416d8, 0x0, 0x0, 0x7fffffff7e8d6e6c, 0x1, 0x1), at 0x7fffffff7fe33f24 [2] enter(0x0, 0x7fffffff7ff3c210, 0x1000, 0x7fffffff7e82f294, 0x7fffffff7ff3cdf8, 0x7fffffff7ff3c638), at 0x7fffffff7fe1bf20 [3] elf_bndr(0x7fffffff7fd00848, 0x1580, 0x1001eb110, 0x14, 0x0, 0x7fffffff7ff3cdf8), at 0x7fffffff7fe28168 [4] elf_rtbndr(0x560000, 0x100567438, 0xfffffffffffffff7, 0x1001eb110, 0x1001f1080, 0xfffffffffffffff7), at 0x7fffffff7fe08394 =>[5] rb_close_before_exec(lowfd = 3, maxhint = 10, noclose_fds = 4301789120U), line 5870 in "io.c" [6] proc_exec_cmd(prog = 0x100868980 "/XXXXX-trunk/bin/ruby", argv_str = 4301789280U, envp_str = 0), line 1269 in "process.c" [7] rb_exec_async_signal_safe(eargp = 0x1009d3ea0, errmsg = 0xffffffff7fffc544 "", errmsg_buflen = 80U), line 3068 in "process.c" [8] popen_exec(pp = 0xffffffff7fffc518, errmsg = 0xffffffff7fffc544 "", errmsg_len = 80U), line 5888 in "io.c" [9] retry_fork_async_signal_safe(status = 0xffffffff7fffc594, ep = 0xffffffff7fffc3fc, chfunc = 0x1001518c0 = &`ruby`io.c`popen_exec(void *pp, char *errmsg, size_t errmsg_len), charg = 0xffffffff7fffc518, errmsg = 0xffffffff7fffc544 "", errmsg_buflen = 80U), line 3540 in "process.c" [10] rb_fork_async_signal_safe(status = 0xffffffff7fffc594, chfunc = 0x1001518c0 = &`ruby`io.c`popen_exec(void *pp, char *errmsg, size_t errmsg_len), charg = 0xffffffff7fffc518, fds = 4301789120U, errmsg = 0xffffffff7fffc544 "", errmsg_buflen = 80U), line 3571 in "process.c" [11] pipe_open(execarg_obj = 4301789560U, modestr = 0x10044db18 "r+", fmode = 7, convconfig = 0xffffffff7fffc6e8), line 6006 in "io.c" [12] rb_io_s_popen(argc = 2, argv = 0x10057ca28, klass = 4302377040U), line 6299 in "io.c" [13] call_cfunc_m1(func = 0x1001523c0 = &`ruby`io.c`rb_io_s_popen(int argc, VALUE *argv, VALUE klass), recv = 4302377040U, argc = 2, argv = 0x10057ca28), line 1421 in "vm_insnhelper.c" [14] vm_call_cfunc_with_frame(th = 0x100578240, reg_cfp = 0x10067c208, ci = 0x100868f88), line 1595 in "vm_insnhelper.c" [15] vm_call_cfunc(th = 0x100578240, reg_cfp = 0x10067c208, ci = 0x100868f88), line 1690 in "vm_insnhelper.c" [16] vm_call_method(th = 0x100578240, cfp = 0x10067c208, ci = 0x100868f88), line 1921 in "vm_insnhelper.c" [17] vm_call_general(th = 0x100578240, reg_cfp = 0x10067c208, ci = 0x100868f88), line 2085 in "vm_insnhelper.c" [18] vm_exec_core(th = 0x100578240, initial = 0), line 977 in "insns.def" [19] vm_exec(th = 0x100578240), line 1486 in "vm.c" [20] invoke_block_from_c(th = 0x100578240, block = 0x10067c348, self = 4305731800U, argc = 1, argv = 0xffffffff7fffd6d8, blockptr = (nil), cref = (nil), defined_class = 4305737000U, splattable = 1), line 857 in "vm.c" [21] vm_yield(th = 0x100578240, argc = 1, argv = 0xffffffff7fffd6d8), line 898 in "vm.c" [22] rb_yield_0(argc = 1, argv = 0xffffffff7fffd6d8), line 994 in "vm_eval.c" [23] rb_yield(val = 1U), line 1004 in "vm_eval.c" [24] int_dotimes(num = 3U), line 3951 in "numeric.c" [25] call_cfunc_0(func = 0x1001a1440 = &`ruby`numeric.c`int_dotimes(VALUE num), recv = 3U, argc = 0, argv = 0x10057c9a8), line 1427 in "vm_insnhelper.c" [26] vm_call_cfunc_with_frame(th = 0x100578240, reg_cfp = 0x10067c328, ci = 0x10089c378), line 1595 in "vm_insnhelper.c" [27] vm_call_cfunc(th = 0x100578240, reg_cfp = 0x10067c328, ci = 0x10089c378), line 1690 in "vm_insnhelper.c" [28] vm_call_method(th = 0x100578240, cfp = 0x10067c328, ci = 0x10089c378), line 1921 in "vm_insnhelper.c" [29] vm_call_general(th = 0x100578240, reg_cfp = 0x10067c328, ci = 0x10089c378), line 2085 in "vm_insnhelper.c" [30] vm_exec_core(th = 0x100578240, initial = 0), line 947 in "insns.def" [31] vm_exec(th = 0x100578240), line 1486 in "vm.c" [32] invoke_block_from_c(th = 0x100578240, block = 0x10067c540, self = 4305731800U, argc = 1, argv = 0xffffffff7fffe718, blockptr = (nil), cref = (nil), defined_class = 4305636240U, splattable = 1), line 857 in "vm.c" [33] vm_yield(th = 0x100578240, argc = 1, argv = 0xffffffff7fffe718), line 898 in "vm.c" [34] rb_yield_0(argc = 1, argv = 0xffffffff7fffe718), line 994 in "vm_eval.c" [35] rb_yield(val = 4305221440U), line 1004 in "vm_eval.c" [36] rb_ary_each(array = 4305221080U), line 1821 in "array.c" [37] call_cfunc_0(func = 0x100042900 = &rb_ary_each(VALUE array), recv = 4305221080U, argc = 0, argv = 0x10057c858), line 1427 in "vm_insnhelper.c" [38] vm_call_cfunc_with_frame(th = 0x100578240, reg_cfp = 0x10067c520, ci = 0x10092d448), line 1595 in "vm_insnhelper.c" [39] vm_call_cfunc(th = 0x100578240, reg_cfp = 0x10067c520, ci = 0x10092d448), line 1690 in "vm_insnhelper.c" [40] vm_call_method(th = 0x100578240, cfp = 0x10067c520, ci = 0x10092d448), line 1921 in "vm_insnhelper.c" [41] vm_call_general(th = 0x100578240, reg_cfp = 0x10067c520, ci = 0x10092d448), line 2085 in "vm_insnhelper.c" [42] vm_exec_core(th = 0x100578240, initial = 0), line 947 in "insns.def" [43] vm_exec(th = 0x100578240), line 1486 in "vm.c" [44] rb_iseq_eval_main(iseqval = 4302120600U), line 1733 in "vm.c" [45] ruby_exec_internal(n = 0x1006d2698), line 257 in "eval.c" [46] ruby_exec_node(n = 0x1006d2698), line 322 in "eval.c" [47] ruby_run_node(n = 0x1006d2698), line 314 in "eval.c" [48] main(argc = 7, argv = 0xffffffff7ffff848), line 36 in "main.c" (dbx) list 5870 ret = fcntl(fd, F_GETFD); /* async-signal-safe */ 5871 if (ret != -1 && !(ret & FD_CLOEXEC)) { 5872 fcntl(fd, F_SETFD, ret|FD_CLOEXEC); /* async-signal-safe */ 5873 } 5874 # define CONTIGUOUS_CLOSED_FDS 20 5875 if (ret != -1) { 5876 if (max < fd + CONTIGUOUS_CLOSED_FDS) 5877 max = fd + CONTIGUOUS_CLOSED_FDS; 5878 } 5879 } (dbx) (dbx) print fd fd = 33 (dbx) print max max = 32 (dbx) print lowfd lowfd = 3 ~~~ 5870: ret = fcntl(fd, F_GETFD); は、 5866: for (fd = lowfd; fd <= max; fd++) { ... } の中でループしているから、fd が max を越えるはずはないはずなのに、 max = 32 にもかかわらず fd = 33 で fcntl(fd, F_GETFD) が呼ばれている? ---------------------------------------- Bug #11265: deadlock on Solaris 10 since r50900 https://bugs.ruby-lang.org/issues/11265#change-53000 * Author: Naohisa Goto * Status: Open * Priority: Normal * Assignee: * ruby -v: - * Backport: 2.0.0: UNKNOWN, 2.1: UNKNOWN, 2.2: UNKNOWN ---------------------------------------- Solaris10にて、r50900 以降、 TestParallel::TestParallel#test_jobs_status TestParallel::TestParallel#test_separate のいずれか、または両方で、テストが停止状態となり次に進まなくなります。 スレッドのデッドロックが発生しているような感じです。 ところで、こういう場合の上手いデバッグ方法って何があるでしょうか? -- https://bugs.ruby-lang.org/