[#3419] Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...>

Hello list,

19 messages 2004/09/17
[#3422] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

[#3423] Re: Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...> 2004/09/17

On Friday 17 Sep 2004 12:01, ts wrote:

[#3424] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

[#3425] Re: Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...> 2004/09/17

On Friday 17 Sep 2004 12:37, ts wrote:

[#3426] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

[#3428] Re: Valgrind analysis of [BUG] unknown node type 0 — Andrew Walrond <andrew@...> 2004/09/17

On Friday 17 Sep 2004 13:05, ts wrote:

[#3429] Re: Valgrind analysis of [BUG] unknown node type 0 — ts <decoux@...> 2004/09/17

>>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:

Re: Valgrind analysis of [BUG] unknown node type 0

From: Andrew Walrond <andrew@...>
Date: 2004-09-17 14:02:32 UTC
List: ruby-core #3432
On Friday 17 Sep 2004 13:50, ts wrote:
> >>>>> "A" == Andrew Walrond <andrew@walrond.org> writes:
>
> A> " So if lomem and himem are not the culprits, then it must be 'p' "
>
> static inline int
> is_pointer_to_heap(ptr)
>     void *ptr;
> {
>     register RVALUE *p = RANY(ptr);
>     register RVALUE *heap_org;
>     register long i;
>
>     if (p < lomem || p > himem) return Qfalse;
>
>     /* check if p looks like a pointer */
>     for (i=0; i < heaps_used; i++) {
>         heap_org = heaps[i].slot;
>         if (heap_org <= p && p < heap_org + heaps[i].limit &&
>             ((((char*)p)-((char*)heap_org))%sizeof(RVALUE)) == 0)
>             return Qtrue;
>     }
>     return Qfalse;
> }
>
>  How do you want that p is unitialized ?
>

Lets put in some extra lines to work out which variable valgrind is 
complaining about:

static inline int
is_pointer_to_heap(ptr)
    void *ptr;
{
    register RVALUE *p = RANY(ptr);
    register RVALUE *heap_org;
    register long i;

    if (lomem==0) i=0;
    if (himem==0) i=0;
    if (p==0) i=0;
    if (p < lomem || p > himem) return Qfalse;

    /* check if p looks like a pointer */
    for (i=0; i < heaps_used; i++) {
  heap_org = heaps[i].slot;
  if (heap_org <= p && p < heap_org + heaps[i].limit &&
      ((((char*)p)-((char*)heap_org))%sizeof(RVALUE)) == 0)
      return Qtrue;
    }
    return Qfalse;
}

Recompile ruby; Rerunning valgrind now gives

==20264== Conditional jump or move depends on uninitialised value(s)
==20264==    at 0x806FF25: is_pointer_to_heap (gc.c:593)
==20264==    by 0x806FEE1: mark_locations_array (gc.c:612)
==20264==    by 0x8071110: rb_gc (gc.c:1331)
==20264==    by 0x806FBAC: rb_newobj (gc.c:376)
==20264==

Line 593 is
    if (p==0) i=0;

So Valgrind thinks there is something screwy about the address in p

Why is wasn't picked up mark_locations_array? Lets see...

static void
mark_locations_array(x, n)
    register VALUE *x;
    register long n;
{
    while (n--) {
  if (is_pointer_to_heap((void *)*x)) {
      gc_mark(*x, 0);
  }
  x++;
    }
}

Perhaps *x isn't  _used_ here. Lets put some debug code in this function:

static void
mark_locations_array(x, n)
    register VALUE *x;
    register long n;
{
   long dummy1=n, dummy2;
    while (n--) {
  if (*x == 0) dummy2=n;
  if (is_pointer_to_heap((void *)*x)) {
      gc_mark(*x, 0);
  }
  x++;
    }
}

Rebuild; Now using valgrind to dump us into gdb at the relevant point:

$ valgrind --tool=memcheck --db-attach=yes ./ruby ../rubyx/rubyx
[snip]
==23845== Conditional jump or move depends on uninitialised value(s)
==23845==    at 0x806FEE1: mark_locations_array (gc.c:613)
==23845==    by 0x8071124: rb_gc (gc.c:1333)
==23845==    by 0x806FBAC: rb_newobj (gc.c:376)
==23845==    by 0x805307E: new_dvar (eval.c:751)
==23845==
==23845== ---- Attach to debugger ? --- [Return/N/n/Y/y/C/c] ---- y
starting debugger
==23845== starting debugger with cmd: /bin/gdb -nw /proc/23848/fd/821 23848
[snip]
0x0806fee1 in mark_locations_array (x=0x52bfd0cc, n=31) at gc.c:613
613             if (*x == 0) dummy2=n;
(gdb) l
608         register VALUE *x;
609         register long n;
610     {
611              long dummy1=n, dummy2;
612         while (n--) {
613             if (*x == 0) dummy2=n;
614             if (is_pointer_to_heap((void *)*x)) {
615                 gc_mark(*x, 0);
616             }
617             x++;
(gdb) info locals
dummy1 = 39
dummy2 = 32
(gdb) p x
$1 = (VALUE *) 0x52bfd0cc
(gdb) p *x
$2 = 134706217
(gdb)                                            

As we thought. We have to _use_ the value for valgrind to complain.
But this is interesting. Is mark_locations array being called with a partially 
unitialised array? or is 'n' too big?
Lets see where it's being called from, in rb_gc()

void
rb_gc()
{
    struct gc_list *list;
    struct FRAME * volatile frame; /* gcc 2.7.2.3 -O2 bug??  */
    jmp_buf save_regs_gc_mark;
    SET_STACK_END;

#ifdef HAVE_NATIVETHREAD
    if (!is_ruby_native_thread()) {
  rb_bug("cross-thread violation on rb_gc()");
    }
#endif
    if (dont_gc || during_gc) {
  if (!freelist) {
      add_heap();
  }
  return;
    }
    if (during_gc) return;
    during_gc++;

    init_mark_stack();

    /* mark frame stack */
    for (frame = ruby_frame; frame; frame = frame->prev) {
  rb_gc_mark_frame(frame);
  if (frame->tmp) {
      struct FRAME *tmp = frame->tmp;
      while (tmp) {
    rb_gc_mark_frame(tmp);
    tmp = tmp->prev;
      }
  }
    }
    gc_mark((VALUE)ruby_scope, 0);
    gc_mark((VALUE)ruby_dyna_vars, 0);
    if (finalizer_table) {
  mark_tbl(finalizer_table, 0);
    }

    FLUSH_REGISTER_WINDOWS;
    /* This assumes that all registers are saved into the jmp_buf (and stack) 
*/
    setjmp(save_regs_gc_mark);
    mark_locations_array((VALUE*)save_regs_gc_mark, 
sizeof(save_regs_gc_mark) / sizeof(VALUE *));
[snip]

So it seems likely that save_regs_gc_mark consists of 39 (VALUE*)s, but only 7 
(dummy1-dummy2 from our debug code) have been initialised with anything.

Does somebody who knows this stuff want to volunteer an opinion as to what's 
going on here?

Andrew

In This Thread