[#84280] [Ruby trunk Bug#14181] hangs or deadlocks from waitpid, threads, and trapping SIGCHLD — nobu@...
Issue #14181 has been updated by nobu (Nobuyoshi Nakada).
3 messages
2017/12/15
[#84398] [Ruby trunk Bug#14220] WEBrick changes - failures on MSWIN, MinGW — Greg.mpls@...
Issue #14220 has been reported by MSP-Greg (Greg L).
3 messages
2017/12/22
[#84472] Re: [ruby-dev:50394] [Ruby trunk Bug#14240] warn four special variables: $; $, $/ $\ — Eric Wong <normalperson@...>
Shouldn't English posts be on ruby-core instead of ruby-dev?
3 messages
2017/12/26
[ruby-core:84280] [Ruby trunk Bug#14181] hangs or deadlocks from waitpid, threads, and trapping SIGCHLD
From:
nobu@...
Date:
2017-12-15 07:40:47 UTC
List:
ruby-core #84280
Issue #14181 has been updated by nobu (Nobuyoshi Nakada).
normalperson (Eric Wong) wrote:
> I guess the sleep_wait_for_interrupt path when !forever has the
> same problem and might sleep too long..
How about this patch?
```diff
diff --git i/thread.c w/thread.c
index cc62ea3905..138c26cb09 100644
--- i/thread.c
+++ w/thread.c
@@ -90,9 +90,13 @@ static VALUE sym_on_blocking;
static VALUE sym_never;
static ID id_locals;
-static void sleep_timeval(rb_thread_t *th, struct timeval time, int spurious_check);
-static void sleep_wait_for_interrupt(rb_thread_t *th, double sleepsec, int spurious_check);
-static void sleep_forever(rb_thread_t *th, int nodeadlock, int spurious_check);
+typedef int (*wakeup_cond_func)(void *);
+static void sleep_timeval(rb_thread_t *th, struct timeval time, int spurious_check,
+ wakeup_cond_func wake, void *warg);
+static void sleep_wait_for_interrupt(rb_thread_t *th, double sleepsec, int spurious_check,
+ wakeup_cond_func wake, void *warg);
+static void sleep_forever(rb_thread_t *th, int nodeadlock, int spurious_check,
+ wakeup_cond_func wake, void *warg);
static void rb_thread_sleep_deadly_allow_spurious_wakeup(void);
static double timeofday(void);
static int rb_threadptr_dead(rb_thread_t *th);
@@ -873,6 +877,13 @@ remove_from_join_list(VALUE arg)
return Qnil;
}
+static int
+wake_join(void *arg)
+{
+ rb_thread_t *target_th = arg;
+ return target_th->status == THREAD_KILLED;
+}
+
static VALUE
thread_join_sleep(VALUE arg)
{
@@ -883,13 +894,7 @@ thread_join_sleep(VALUE arg)
while (target_th->status != THREAD_KILLED) {
if (forever) {
- th->status = THREAD_STOPPED_FOREVER;
- th->vm->sleeper++;
- rb_check_deadlock(th->vm);
- native_sleep(th, 0);
- th->vm->sleeper--;
- RUBY_VM_CHECK_INTS_BLOCKING(th->ec);
- th->status = THREAD_RUNNABLE;
+ sleep_forever(th, TRUE, FALSE, wake_join, target_th);
}
else {
double now = timeofday();
@@ -898,7 +903,7 @@ thread_join_sleep(VALUE arg)
thread_id_str(target_th));
return Qfalse;
}
- sleep_wait_for_interrupt(th, limit - now, 0);
+ sleep_wait_for_interrupt(th, limit - now, 0, wake_join, target_th);
}
thread_debug("thread_join: interrupted (thid: %"PRI_THREAD_ID", status: %s)\n",
thread_id_str(target_th), thread_status_name(target_th, TRUE));
@@ -1094,14 +1099,15 @@ double2timeval(double d)
}
static void
-sleep_forever(rb_thread_t *th, int deadlockable, int spurious_check)
+sleep_forever(rb_thread_t *th, int deadlockable, int spurious_check,
+ wakeup_cond_func wake, void *warg)
{
enum rb_thread_status prev_status = th->status;
enum rb_thread_status status = deadlockable ? THREAD_STOPPED_FOREVER : THREAD_STOPPED;
th->status = status;
RUBY_VM_CHECK_INTS_BLOCKING(th->ec);
- while (th->status == status) {
+ while (!(wake && wake(warg)) && th->status == status) {
if (deadlockable) {
th->vm->sleeper++;
rb_check_deadlock(th->vm);
@@ -1135,7 +1141,8 @@ getclockofday(struct timeval *tp)
}
static void
-sleep_timeval(rb_thread_t *th, struct timeval tv, int spurious_check)
+sleep_timeval(rb_thread_t *th, struct timeval tv, int spurious_check,
+ wakeup_cond_func wake, void *warg)
{
struct timeval to, tvn;
enum rb_thread_status prev_status = th->status;
@@ -1156,7 +1163,7 @@ sleep_timeval(rb_thread_t *th, struct timeval tv, int spurious_check)
th->status = THREAD_STOPPED;
RUBY_VM_CHECK_INTS_BLOCKING(th->ec);
- while (th->status == THREAD_STOPPED) {
+ while (!(wake && wake(warg)) && th->status == THREAD_STOPPED) {
native_sleep(th, &tv);
RUBY_VM_CHECK_INTS_BLOCKING(th->ec);
getclockofday(&tvn);
@@ -1180,21 +1187,21 @@ void
rb_thread_sleep_forever(void)
{
thread_debug("rb_thread_sleep_forever\n");
- sleep_forever(GET_THREAD(), FALSE, TRUE);
+ sleep_forever(GET_THREAD(), FALSE, TRUE, NULL, NULL);
}
void
rb_thread_sleep_deadly(void)
{
thread_debug("rb_thread_sleep_deadly\n");
- sleep_forever(GET_THREAD(), TRUE, TRUE);
+ sleep_forever(GET_THREAD(), TRUE, TRUE, NULL, NULL);
}
static void
rb_thread_sleep_deadly_allow_spurious_wakeup(void)
{
thread_debug("rb_thread_sleep_deadly_allow_spurious_wakeup\n");
- sleep_forever(GET_THREAD(), TRUE, FALSE);
+ sleep_forever(GET_THREAD(), TRUE, FALSE, NULL, NULL);
}
static double
@@ -1216,16 +1223,17 @@ timeofday(void)
}
static void
-sleep_wait_for_interrupt(rb_thread_t *th, double sleepsec, int spurious_check)
+sleep_wait_for_interrupt(rb_thread_t *th, double sleepsec, int spurious_check,
+ wakeup_cond_func wake, void *warg)
{
- sleep_timeval(th, double2timeval(sleepsec), spurious_check);
+ sleep_timeval(th, double2timeval(sleepsec), spurious_check, wake, warg);
}
void
rb_thread_wait_for(struct timeval time)
{
rb_thread_t *th = GET_THREAD();
- sleep_timeval(th, time, 1);
+ sleep_timeval(th, time, 1, NULL, NULL);
}
/*
diff --git i/thread_sync.c w/thread_sync.c
index 6eff5e759c..ca74fe5866 100644
--- i/thread_sync.c
+++ w/thread_sync.c
@@ -428,7 +428,7 @@ static VALUE
rb_mutex_wait_for(VALUE time)
{
struct timeval *t = (struct timeval *)time;
- sleep_timeval(GET_THREAD(), *t, 0); /* permit spurious check */
+ sleep_timeval(GET_THREAD(), *t, 0, NULL, NULL); /* permit spurious check */
return Qnil;
}
```
----------------------------------------
Bug #14181: hangs or deadlocks from waitpid, threads, and trapping SIGCHLD
https://bugs.ruby-lang.org/issues/14181#change-68435
* Author: ccutrer (Cody Cutrer)
* Status: Closed
* Priority: Normal
* Assignee:
* Target version:
* ruby -v: ruby 2.4.2p198 (2017-09-14 revision 59899) [x86_64-linux-gnu]
* Backport: 2.3: UNKNOWN, 2.4: UNKNOWN
----------------------------------------
I'm not exactly sure what's going on here, but the end result is basically a thread is getting killed unexpectedly during a waitpid call, when SIGCHLD is being handled. In a more complex scenario, we end up hanging because Thread#join is ends up waiting on a thread that's already dead (presumably because it died in a non-standard way), or in a simpler scenario, the output is:
```
loop 250
loop 251
/usr/lib/ruby/2.4.0/timeout.rb:97:in `join': No live threads left. Deadlock? (fatal)
1 threads, 1 sleeps current:0x00000000019205e0 main thread:0x00000000019205e0
* #<Thread:0x0000000001955e38 sleep_forever>
rb_thread_t:0x00000000019205e0 native:0x00007f900a082700 int:0
/usr/lib/ruby/2.4.0/timeout.rb:97:in `join'
/usr/lib/ruby/2.4.0/timeout.rb:97:in `ensure in block in timeout'
/usr/lib/ruby/2.4.0/timeout.rb:97:in `block in timeout'
/usr/lib/ruby/2.4.0/timeout.rb:33:in `block in catch'
/usr/lib/ruby/2.4.0/timeout.rb:33:in `catch'
/usr/lib/ruby/2.4.0/timeout.rb:33:in `catch'
/usr/lib/ruby/2.4.0/timeout.rb:108:in `timeout'
./test.rb:11:in `<main>'
from /usr/lib/ruby/2.4.0/timeout.rb:97:in `ensure in block in timeout'
from /usr/lib/ruby/2.4.0/timeout.rb:97:in `block in timeout'
from /usr/lib/ruby/2.4.0/timeout.rb:33:in `block in catch'
from /usr/lib/ruby/2.4.0/timeout.rb:33:in `catch'
from /usr/lib/ruby/2.4.0/timeout.rb:33:in `catch'
from /usr/lib/ruby/2.4.0/timeout.rb:108:in `timeout'
from ./test.rb:11:in `<main>'
```
The simpler repro, where I'm obviously not doing anything I shouldn't be doing in the signal handler:
```
#!/usr/bin/env ruby
require 'timeout'
trap(:CHLD) { }
x = 0
while true
puts "loop #{x += 1}"
pid = Process.spawn('sleep 1')
Timeout.timeout(30) do
Process.waitpid(pid)
end
end
```
A slightly more complex repro that I'm still pretty sure what I'm doing in the signal handler is okay, but ends up hanging:
```
#!/usr/bin/env ruby
require 'timeout'
self_pipe = IO.pipe
signal_queue = []
def wake_up(self_pipe)
self_pipe[1].write_nonblock('.', exception: false)
end
trap(:CHLD) { signal_queue << :CHLD; wake_up(self_pipe) }
signal_processor = Thread.new do
loop do
self_pipe[0].read(1)
signal_queue.pop
end
end
x = 0
while true
puts "loop #{x += 1}"
pid = Process.spawn('sleep 1')
Timeout.timeout(30) do
Process.waitpid(pid)
end
end
```
In either case, it can take many loops before it fails, up to a few hundred. I've reproed on both Ubuntu Xenial, and macOS 10.12.6 (the former with ruby 2.4.2, the latter with ruby 2.4.1).
--
https://bugs.ruby-lang.org/
Unsubscribe: <mailto:ruby-core-request@ruby-lang.org?subject=unsubscribe>
<http://lists.ruby-lang.org/cgi-bin/mailman/options/ruby-core>