[#3419] Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...>

Hello list,

19 messages 2004/09/17
[#3422] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

[#3423] Re: Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...> 2004/09/17

On Friday 17 Sep 2004 12:01, ts wrote:

[#3424] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

[#3425] Re: Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...> 2004/09/17

On Friday 17 Sep 2004 12:37, ts wrote:

[#3426] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

[#3428] Re: Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...> 2004/09/17

On Friday 17 Sep 2004 13:05, ts wrote:

[#3429] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

Re: Valgrind analysis of [BUG] unknown node type 0

From: Andrew Walrond <andrew@...>
Date: 2004-09-17 14:16:03 UTC
List: ruby-core #3434
On Friday 17 Sep 2004 13:50, ts wrote:
> >>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:
>
> A> " So if lomem and himem are not the culprits, then it must be 'p' "
>
> static inline int
> is_pointer_to_heap(ptr)
>     void *ptr;
> {
>     register RVALUE *p = RANY(ptr);
>     register RVALUE *heap_org;
>     register long i;
>
>     if (p < lomem || p > himem) return Qfalse;
>
>     /* check if p looks like a pointer */
>     for (i=0; i < heaps_used; i++) {
>         heap_org = heaps[i].slot;
>         if (heap_org <= p && p < heap_org + heaps[i].limit &&
>             ((((char*)p)-((char*)heap_org))%sizeof(RVALUE)) == 0)
>             return Qtrue;
>     }
>     return Qfalse;
> }
>
>  How do you want that p is unitialized ?
>

Lets put in some extra lines to work out which variable valgrind is 
complaining about:

static inline int
is_pointer_to_heap(ptr)
    void *ptr;
{
    register RVALUE *p = RANY(ptr);
    register RVALUE *heap_org;
    register long i;

    if (lomem==0) i=0;
    if (himem==0) i=0;
    if (p==0) i=0;
    if (p < lomem || p > himem) return Qfalse;

    /* check if p looks like a pointer */
    for (i=0; i < heaps_used; i++) {
  heap_org = heaps[i].slot;
  if (heap_org <= p && p < heap_org + heaps[i].limit &&
      ((((char*)p)-((char*)heap_org))%sizeof(RVALUE)) == 0)
      return Qtrue;
    }
    return Qfalse;
}

Recompile ruby; Rerunning valgrind now gives

==20264== Conditional jump or move depends on uninitialised value(s)
==20264==    at 0x806FF25: is_pointer_to_heap (gc.c:593)
==20264==    by 0x806FEE1: mark_locations_array (gc.c:612)
==20264==    by 0x8071110: rb_gc (gc.c:1331)
==20264==    by 0x806FBAC: rb_newobj (gc.c:376)
==20264==

Line 593 is
    if (p==0) i=0;

So Valgrind thinks there is something screwy about the address in p

Why is wasn't picked up mark_locations_array? Lets see...

static void
mark_locations_array(x, n)
    register VALUE *x;
    register long n;
{
    while (n--) {
  if (is_pointer_to_heap((void *)*x)) {
      gc_mark(*x, 0);
  }
  x++;
    }
}

Perhaps *x isn't  _used_ here. Lets put some debug code in this function:

static void
mark_locations_array(x, n)
    register VALUE *x;
    register long n;
{
   long dummy1=n, dummy2;
    while (n--) {
  if (*x == 0) dummy2=n;
  if (is_pointer_to_heap((void *)*x)) {
      gc_mark(*x, 0);
  }
  x++;
    }
}

Rebuild; Now using valgrind to dump us into gdb at the relevant point:

$ valgrind --tool=memcheck --db-attach=yes ./ruby ../rubyx/rubyx
[snip]
==23845== Conditional jump or move depends on uninitialised value(s)
==23845==    at 0x806FEE1: mark_locations_array (gc.c:613)
==23845==    by 0x8071124: rb_gc (gc.c:1333)
==23845==    by 0x806FBAC: rb_newobj (gc.c:376)
==23845==    by 0x805307E: new_dvar (eval.c:751)
==23845==
==23845== ---- Attach to debugger ? --- [Return/N/n/Y/y/C/c] ---- y
starting debugger
==23845== starting debugger with cmd: /bin/gdb -nw /proc/23848/fd/821 23848
[snip]
0x0806fee1 in mark_locations_array (x=0x52bfd0cc, n=31) at gc.c:613
613             if (*x == 0) dummy2=n;
(gdb) l
608         register VALUE *x;
609         register long n;
610     {
611              long dummy1=n, dummy2;
612         while (n--) {
613             if (*x == 0) dummy2=n;
614             if (is_pointer_to_heap((void *)*x)) {
615                 gc_mark(*x, 0);
616             }
617             x++;
(
(gdb) info localsgdb) p n
$1 = 31
(gdb) p dummy1
$2 = 39
(gdb)                                            

As we thought. We have to _use_ the value for valgrind to complain.
But this is interesting. Is mark_locations array being called with a partially 
unitialised array? or is 'n' too big?
Lets see where it's being called from, in rb_gc()

void
rb_gc()
{
    struct gc_list *list;
    struct FRAME * volatile frame; /* gcc 2.7.2.3 -O2 bug??  */
    jmp_buf save_regs_gc_mark;
    SET_STACK_END;

#ifdef HAVE_NATIVETHREAD
    if (!is_ruby_native_thread()) {
  rb_bug("cross-thread violation on rb_gc()");
    }
#endif
    if (dont_gc || during_gc) {
  if (!freelist) {
      add_heap();
  }
  return;
    }
    if (during_gc) return;
    during_gc++;

    init_mark_stack();

    /* mark frame stack */
    for (frame = ruby_frame; frame; frame = frame->prev) {
  rb_gc_mark_frame(frame);
  if (frame->tmp) {
      struct FRAME *tmp = frame->tmp;
      while (tmp) {
    rb_gc_mark_frame(tmp);
    tmp = tmp->prev;
      }
  }
    }
    gc_mark((VALUE)ruby_scope, 0);
    gc_mark((VALUE)ruby_dyna_vars, 0);
    if (finalizer_table) {
  mark_tbl(finalizer_table, 0);
    }

    FLUSH_REGISTER_WINDOWS;
    /* This assumes that all registers are saved into the jmp_buf (and stack) 
*/
    setjmp(save_regs_gc_mark);
    mark_locations_array((VALUE*)save_regs_gc_mark, 
sizeof(save_regs_gc_mark) / sizeof(VALUE *));
[snip]

So it seems likely that save_regs_gc_mark consists of 39 (VALUE*)s, but only 8 
(dummy1-n from our debug code) have been initialised with anything; the rest 
is garbage.

Does somebody who knows this stuff want to volunteer an opinion as to what's 
going on here?

Andrew

In This Thread