[#37679] [FEATURE:trunk] EncDet again — "Yugui (Yuki Sonoda)" <yugui@...>

Yuguiです。

23 messages 2009/01/03

[#37748] $LOAD_PATHとバージョンの運用の関係 — akira yamada / やまだあきら <akira@...>

1.9系でのバージョンの運用と$LOAD_PATHの値について質問です。

12 messages 2009/01/09
[#37758] Re: $LOAD_PATHとバージョンの運用の関係 — "NARUSE, Yui" <naruse@...> 2009/01/11

成瀬です。

[ruby-dev:37784] Re: String#hash

From: Nobuyoshi Nakada <nobu@...>
Date: 2009-01-18 20:28:18 UTC
List: ruby-dev #37784
なかだです。

At Sat, 17 Jan 2009 16:33:08 +0900,
Yukihiro Matsumoto wrote in [ruby-dev:37779]:
> |例えば, Complexだと
> |
> |  @real.hash ^ @imag.hash
> |
> |となっています. 実際には, これはあまり良くなくて, @real==@image のとき
> |つねに同じ値になってしまいます. そこで, 別のseedを与え
> |
> |  @real.hash(seed1) ^ @image.hash(seed2)
> |
> |の様にすると, 上記の問題はなくなります. これは, Array等のhash関数でも
> |同様です. また, Fixnum#hashとObject#hashも別のseedを与えるのもよさそう
> |な気がします.
> 
> 毎回変わる理由は[ruby-dev:37778]の通りなんですが、各オブジェ
> クトのhashの計算方式を更新した方が良いという提案はそれとは独
> 立に有効だと思います。どんな計算式が良いのか議論していただけ
> ませんか?

ComplexやRationalはrb_memhash()が使えると思いますが、可変長であ
るArrayやObject、Structなどでは多少工夫が必要でしょう。

しかし、MurmurHashって途中でたまたま0になるとそれ以降のデータは
無関係になるような気がするんですが、いいんでしょうか。


Index: array.c
===================================================================
--- array.c	(revision 21650)
+++ array.c	(working copy)
@@ -2722,10 +2722,10 @@ recursive_hash(VALUE ary, VALUE dummy, i
 	return LONG2FIX(0);
     }
-    h = RARRAY_LEN(ary);
+    h = rb_hash_start(RARRAY_LEN(ary));
     for (i=0; i<RARRAY_LEN(ary); i++) {
-	h = (h << 1) | (h<0 ? 1 : 0);
 	n = rb_hash(RARRAY_PTR(ary)[i]);
-	h ^= NUM2LONG(n);
+	h = rb_hash_uint(h, NUM2LONG(n));
     }
+    h = rb_hash_end(h);
     return LONG2FIX(h);
 }
Index: complex.c
===================================================================
--- complex.c	(revision 21650)
+++ complex.c	(working copy)
@@ -23,5 +23,5 @@ VALUE rb_cComplex;
 
 static ID id_abs, id_abs2, id_arg, id_cmp, id_conj, id_convert,
-    id_denominator, id_divmod, id_equal_p, id_expt, id_floor, id_hash,
+    id_denominator, id_divmod, id_equal_p, id_expt, id_floor,
     id_idiv, id_inspect, id_negate, id_numerator, id_polar, id_quo,
     id_real_p, id_to_f, id_to_i, id_to_r, id_to_s;
@@ -154,6 +154,4 @@ f_sub(VALUE x, VALUE y)
 }
 
-binop(xor, '^')
-
 fun1(abs)
 fun1(abs2)
@@ -162,5 +160,4 @@ fun1(conj)
 fun1(denominator)
 fun1(floor)
-fun1(hash)
 fun1(inspect)
 fun1(negate)
@@ -858,6 +855,15 @@ static VALUE
 nucomp_hash(VALUE self)
 {
+    long v, h[3];
+    VALUE n;
+
     get_dat1(self);
-    return f_xor(f_hash(dat->real), f_hash(dat->imag));
+    h[0] = rb_hash(rb_obj_class(self));
+    n = rb_hash(dat->real);
+    h[1] = NUM2LONG(n);
+    n = rb_hash(dat->imag);
+    h[2] = NUM2LONG(n);
+    v = rb_memhash(h, sizeof(h));
+    return LONG2FIX(v);
 }
 
@@ -1383,5 +1389,4 @@ Init_Complex(void)
     id_expt = rb_intern("**");
     id_floor = rb_intern("floor");
-    id_hash = rb_intern("hash");
     id_idiv = rb_intern("div");
     id_inspect = rb_intern("inspect");
Index: hash.c
===================================================================
--- hash.c	(revision 21650)
+++ hash.c	(working copy)
@@ -70,5 +70,5 @@ rb_any_hash(VALUE a)
       case T_FIXNUM:
       case T_SYMBOL:
-	hnum = (int)a;
+	hnum = rb_hash_end(rb_hash_start((unsigned int)a));
 	break;
 
@@ -1511,7 +1511,10 @@ static int
 hash_i(VALUE key, VALUE val, int *hval)
 {
+    VALUE n;
     if (key == Qundef) return ST_CONTINUE;
-    *hval ^= rb_hash(key);
-    *hval ^= rb_hash(val);
+    n = rb_hash(key);
+    *hval = rb_hash_uint(*hval, NUM2LONG(n));
+    n = rb_hash(val);
+    *hval = rb_hash_uint(*hval, NUM2LONG(n));
     return ST_CONTINUE;
 }
@@ -1527,6 +1530,7 @@ recursive_hash(VALUE hash, VALUE dummy, 
     if (!RHASH(hash)->ntbl)
         return LONG2FIX(0);
-    hval = RHASH(hash)->ntbl->num_entries;
+    hval = rb_hash_start(RHASH(hash)->ntbl->num_entries);
     rb_hash_foreach(hash, hash_i, (st_data_t)&hval);
+    hval = rb_hash_end(hval);
     return INT2FIX(hval);
 }
Index: gc.c
===================================================================
--- gc.c	(revision 21650)
+++ gc.c	(working copy)
@@ -2904,5 +2904,4 @@ Init_GC(void)
     OBJ_FREEZE(nomem_error);
 
-    rb_define_method(rb_mKernel, "hash", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);
Index: object.c
===================================================================
--- object.c	(revision 21650)
+++ object.c	(working copy)
@@ -96,4 +96,12 @@ rb_obj_equal(VALUE obj1, VALUE obj2)
 }
 
+VALUE
+rb_obj_hash(VALUE obj)
+{
+    VALUE oid = rb_obj_id(obj);
+    unsigned h = rb_hash_end(rb_hash_start(NUM2LONG(oid)));
+    return LONG2NUM(h);
+}
+
 /*
  *  call-seq:
@@ -2517,4 +2525,5 @@ Init_Object(void)
     rb_define_method(rb_mKernel, "!~", rb_obj_not_match, 1);
     rb_define_method(rb_mKernel, "eql?", rb_obj_equal, 1);
+    rb_define_method(rb_mKernel, "hash", rb_obj_hash, 0);
 
     rb_define_method(rb_mKernel, "class", rb_obj_class, 0);
Index: range.c
===================================================================
--- range.c	(revision 21650)
+++ range.c	(working copy)
@@ -214,12 +214,14 @@ static VALUE
 range_hash(VALUE range)
 {
-    long hash = EXCL(range);
+    unsigned hash = EXCL(range);
     VALUE v;
 
+    hash = rb_hash_start(hash);
     v = rb_hash(RANGE_BEG(range));
-    hash ^= v << 1;
+    hash = rb_hash_uint(hash, NUM2LONG(v));
     v = rb_hash(RANGE_END(range));
-    hash ^= v << 9;
-    hash ^= EXCL(range) << 24;
+    hash = rb_hash_uint(hash, NUM2LONG(v));
+    hash = rb_hash_uint(hash, EXCL(range) << 24);
+    hash = rb_hash_end(hash);
 
     return LONG2FIX(hash);
Index: rational.c
===================================================================
--- rational.c	(revision 21650)
+++ rational.c	(working copy)
@@ -28,5 +28,5 @@ VALUE rb_cRational;
 
 static ID id_abs, id_cmp, id_convert, id_equal_p, id_expt, id_floor,
-    id_hash, id_idiv, id_inspect, id_integer_p, id_negate, id_to_f,
+    id_idiv, id_inspect, id_integer_p, id_negate, id_to_f,
     id_to_i, id_to_s, id_truncate;
 
@@ -136,9 +136,6 @@ f_sub(VALUE x, VALUE y)
 }
 
-binop(xor, '^')
-
 fun1(abs)
 fun1(floor)
-fun1(hash)
 fun1(inspect)
 fun1(integer_p)
@@ -1162,6 +1159,15 @@ static VALUE
 nurat_hash(VALUE self)
 {
+    long v, h[3];
+    VALUE n;
+
     get_dat1(self);
-    return f_xor(f_hash(dat->num), f_hash(dat->den));
+    h[0] = rb_hash(rb_obj_class(self));
+    n = rb_hash(dat->num);
+    h[1] = NUM2LONG(n);
+    n = rb_hash(dat->den);
+    h[2] = NUM2LONG(n);
+    v = rb_memhash(h, sizeof(h));
+    return LONG2FIX(v);
 }
 
@@ -1555,5 +1561,4 @@ Init_Rational(void)
     id_expt = rb_intern("**");
     id_floor = rb_intern("floor");
-    id_hash = rb_intern("hash");
     id_idiv = rb_intern("div");
     id_inspect = rb_intern("inspect");
Index: string.c
===================================================================
--- string.c	(revision 21650)
+++ string.c	(working copy)
@@ -1883,10 +1883,17 @@ rb_str_concat(VALUE str1, VALUE str2)
 
 /* MurmurHash described in http://murmurhash.googlepages.com/ */
-static unsigned int
-hash(const unsigned char * data, int len, unsigned int h)
+static inline uint32_t
+murmur(unsigned int h, const int r)
 {
     const unsigned int m = 0x7fd652ad;
-    const int r = 16;
+    h *= m;
+    return h ^ (h >> r);
+}
+
+#define murmur16(h) murmur(h, 16)
 
+static unsigned int
+hash(const unsigned char * data, int len, unsigned int h)
+{
     h += 0xdeadbeef;
 
@@ -1929,7 +1936,5 @@ hash(const unsigned char * data, int len
 		t = (t >> sr) | (d << sl);
 #endif
-		h += t;
-		h *= m;
-		h ^= h >> r;
+		h = murmur16(h + t);
 		t = d;
 
@@ -1954,6 +1959,5 @@ hash(const unsigned char * data, int len
 		h += (t >> sr) | (d << sl);
 #endif
-		h *= m;
-		h ^= h >> r;
+		h = murmur16(h);
 	    }
 
@@ -1965,8 +1969,5 @@ hash(const unsigned char * data, int len
 	{
 	    do {
-		h += *(uint32_t *)data;
-		h *= m;
-		h ^= h >> r;
-
+		h = murmur16(h + *(uint32_t *)data);
 		data += 4;
 		len -= 4;
@@ -1991,18 +1992,57 @@ hash(const unsigned char * data, int len
 	h += data[0];
 #endif
-	h *= m;
-	h ^= h >> r;
+	h = murmur16(h);
     }
 
-    h *= m;
-    h ^= h >> 10;
-    h *= m;
-    h ^= h >> 17;
+    return rb_hash_end(h);
+}
+
+unsigned int
+rb_hash_uint32(unsigned int h, unsigned int i)
+{
+    return murmur(h + i, 16);
+}
+
+unsigned int
+rb_hash_uint(unsigned int h, unsigned int i)
+{
+    unsigned int v = 0;
+    h += i;
+#ifdef WORDS_BIGENDIAN
+#if SIZEOF_INT*CHAR_BIT > 12*8
+    v = murmur16(v + (h >> 12*8));
+#endif
+#if SIZEOF_INT*CHAR_BIT > 8*8
+    v = murmur16(v + (h >> 8*8));
+#endif
+#if SIZEOF_INT*CHAR_BIT > 4*8
+    v = murmur16(v + (h >> 4*8));
+#endif
+#endif
+    v = murmur16(v + h);
+#ifndef WORDS_BIGENDIAN
+#if SIZEOF_INT*CHAR_BIT > 4*8
+    v = murmur16(v + (h >> 4*8));
+#endif
+#if SIZEOF_INT*CHAR_BIT > 8*8
+    v = murmur16(v + (h >> 8*8));
+#endif
+#if SIZEOF_INT*CHAR_BIT > 12*8
+    v = murmur16(v + (h >> 12*8));
+#endif
+#endif
+    return v;
+}
 
+unsigned int
+rb_hash_end(unsigned int h)
+{
+    h = murmur(h, 10);
+    h = murmur(h, 17);
     return h;
 }
 
-int
-rb_memhash(const void *ptr, long len)
+unsigned int
+rb_hash_start(unsigned int h)
 {
     static int hashseed_init = 0;
@@ -2013,6 +2053,11 @@ rb_memhash(const void *ptr, long len)
         hashseed_init = 1;
     }
+    return hashseed + h;
+}
 
-    return hash(ptr, len, hashseed);
+int
+rb_memhash(const void *ptr, long len)
+{
+    return hash(ptr, len, rb_hash_start(0));
 }
 
Index: struct.c
===================================================================
--- struct.c	(revision 21650)
+++ struct.c	(working copy)
@@ -805,14 +805,15 @@ static VALUE
 rb_struct_hash(VALUE s)
 {
-    long i, h;
+    long i;
+    unsigned h;
     VALUE n;
 
-    h = rb_hash(rb_obj_class(s));
+    h = rb_hash_start(rb_hash(rb_obj_class(s)));
     for (i = 0; i < RSTRUCT_LEN(s); i++) {
-	h = (h << 1) | (h<0 ? 1 : 0);
 	n = rb_hash(RSTRUCT_PTR(s)[i]);
-	h ^= NUM2LONG(n);
+	h = rb_hash_uint(h, NUM2LONG(n));
     }
-    return LONG2FIX(h);
+    h = rb_hash_end(h);
+    return INT2FIX(h);
 }
 
Index: time.c
===================================================================
--- time.c	(revision 21650)
+++ time.c	(working copy)
@@ -1184,5 +1184,5 @@ time_hash(VALUE time)
 
     GetTimeval(time, tobj);
-    hash = tobj->ts.tv_sec ^ tobj->ts.tv_nsec;
+    hash = rb_hash_end(rb_hash_uint(rb_hash_start(tobj->ts.tv_sec), tobj->ts.tv_nsec));
     return LONG2FIX(hash);
 }
Index: include/ruby/intern.h
===================================================================
--- include/ruby/intern.h	(revision 21650)
+++ include/ruby/intern.h	(working copy)
@@ -607,4 +607,8 @@ VALUE rb_str_append(VALUE, VALUE);
 VALUE rb_str_concat(VALUE, VALUE);
 int rb_memhash(const void *ptr, long len);
+unsigned int rb_hash_start(unsigned int);
+unsigned int rb_hash_uint32(unsigned int, unsigned int);
+unsigned int rb_hash_uint(unsigned int, unsigned int);
+unsigned int rb_hash_end(unsigned int);
 int rb_str_hash(VALUE);
 int rb_str_hash_cmp(VALUE,VALUE);


-- 
--- 僕の前にBugはない。
--- 僕の後ろにBugはできる。
    中田 伸悦

In This Thread