[#3419] Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...>

Hello list,

19 messages 2004/09/17
[#3422] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

[#3423] Re: Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...> 2004/09/17

On Friday 17 Sep 2004 12:01, ts wrote:

[#3424] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

[#3425] Re: Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...> 2004/09/17

On Friday 17 Sep 2004 12:37, ts wrote:

[#3426] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

[#3428] Re: Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...> 2004/09/17

On Friday 17 Sep 2004 13:05, ts wrote:

[#3429] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

Re: [PATCH] dir.c --- Dir.chdir error handling

From: "H.Yamamoto" <ocean@...2.ccsnet.ne.jp>
Date: 2004-09-16 03:59:05 UTC
List: ruby-core #3411
>Maybe you mean rb_gc() inside ruby_xmalloc()?

Until now, my concern was just theory, but I could reproduce the problem
in real code... This simple code has potential bug.

  rb_str_intern(rb_str_new2("foo"));


VALUE
rb_str_intern(str)
    VALUE str;
{
    ID id;

    if (!RSTRING(str)->ptr || RSTRING(str)->len == 0) {
	rb_raise(rb_eArgError, "interning empty string");
    }
    if (strlen(RSTRING(str)->ptr) != RSTRING(str)->len)
	rb_raise(rb_eArgError, "symbol string may not contain `\\0'");
    id = rb_intern(RSTRING(str)->ptr);
    return ID2SYM(id);
}

and rb_intern(const char *p) calls strdup() to regist string to hash table.
strdup() is replaced with ruby_strdup() in util.h, and implementation is

// util.c

char *
ruby_strdup(str)
    const char *str;
{
    char *tmp;
    int len = strlen(str) + 1;

    tmp = xmalloc(len);
    memcpy(tmp, str, len);

    return tmp;
}

and implementation of xmalloc() is

// gc.c

void *
ruby_xmalloc(size)
    long size;
{
    void *mem;

    if (size < 0) {
	rb_raise(rb_eNoMemError, "negative allocation size (or too big)");
    }
    if (size == 0) size = 1;
    malloc_increase += size;

    if (malloc_increase > malloc_limit) {
	rb_gc();
    }
    RUBY_CRITICAL(mem = malloc(size));
    if (!mem) {
	rb_gc();
	RUBY_CRITICAL(mem = malloc(size));
	if (!mem) {
	    rb_memerror();
	}
    }

    return mem;
}

Sometimes xmalloc calls rb_gc(). in that case, strdup behaves like this

char *
ruby_strdup(str)
    const char *str;
{
    char *tmp;
    int len = strlen(str) + 1;

    rb_gc();
    tmp = malloc(len);
    memcpy(tmp, str, len);

    return tmp;
}

If rb_str_intern is inlined and arguments of rb_str_intern `str' is erased,
rb_gc() frees memory of `str' wrongly, and memcpy tries to copy from freed
memory.

I could reproduce this problem with VisualC++6 OPTFLAGS=-O2

///////////////////////////////////////////////////
// Code

Index: gc.c
===================================================================
RCS file: /var/cvs/src/ruby/gc.c,v
retrieving revision 1.183
diff -u -w -b -p -r1.183 gc.c
--- gc.c	28 Jul 2004 01:22:48 -0000	1.183
+++ gc.c	16 Sep 2004 03:43:55 -0000
@@ -102,6 +102,8 @@ rb_memerror()
     rb_exc_raise(nomem_error);
 }
 
+extern volatile int hogehoge;
+
 void *
 ruby_xmalloc(size)
     long size;
@@ -113,6 +115,10 @@ ruby_xmalloc(size)
     }
     if (size == 0) size = 1;
     malloc_increase += size;
+
+    if (hogehoge) { /* emulate GC situation */
+	rb_gc();
+    }
 
     if (malloc_increase > malloc_limit) {
 	rb_gc();

Index: string.c
===================================================================
RCS file: /var/cvs/src/ruby/string.c,v
retrieving revision 1.197
diff -u -w -b -p -r1.197 string.c
--- string.c	19 Aug 2004 07:33:14 -0000	1.197
+++ string.c	16 Sep 2004 03:45:09 -0000
@@ -4351,7 +4351,7 @@ rb_str_crypt(str, salt)
  *     'cat and dog'.to_sym   #=> :"cat and dog"
  */
 
-VALUE
+inline VALUE
 rb_str_intern(str)
     VALUE str;
 {
@@ -4585,10 +4585,23 @@ rb_str_setter(val, id, var)
  *     
  */
 
+extern volatile int hogehoge = 0;
+
+static VALUE
+string_s_test(VALUE self)
+{
+    hogehoge = 1; /* triger GC */
+
+    return rb_str_intern(rb_str_new2("foo"));
+}
+
 void
 Init_String()
 {
     rb_cString  = rb_define_class("String", rb_cObject);
+    
+    rb_define_method(rb_cString, "test", string_s_test, 0);
+    
     rb_include_module(rb_cString, rb_mComparable);
     rb_include_module(rb_cString, rb_mEnumerable);
     rb_define_alloc_func(rb_cString, str_alloc);

////////////////////////////////
// Result

E:\ruby-cvs\win32>nmake OPTFLAGS=-O2

(snip)

E:\ruby-cvs\win32>miniruby -e "p 'test'.test"
:8ァ

//////////////////////////////
// Result (hogehoge = 1; in string.c is commented out)


E:\ruby-cvs\win32>nmake OPTFLAGS=-O2

(snip)

E:\ruby-cvs\win32>miniruby -e "p 'test'.test"
:foo


# My opinion is not "rb_str_intern() must protect his argument" but "it's impossible
# to protect all objects this way". If we don't protect VALUE on object creation,
# we must do
#
#     typedef volatile unsigned long VALUE;
#
# When I tried this (and replaced all "volatile VALUE" with "VALUE"),
# I was warned suspicious pointer convertion though.


In This Thread