ruby-dev

成瀬です。

現在の rb_memsearch は Karp-Rabin を使っています。
導入の際に Karp-Rabin についておっしゃっていた通り、
確かにパターンが短い場合でも長い場合でもそれなりの性能を示しています。

しかし、パターンの長さによってアルゴリズムの向き不向きが変わるのならば、
長さによってアルゴリズムを変えてはどうでしょう。
具体的にはパターンが 1 バイトの場合はリニアサーチが最速です。
逆に、WORD幅 - 1 バイトを超える場合にはどんどん飛ばしていく BM 系の方が速いでしょう。


さて、今回はパターンを5種類に分け、
1. m > n, m == n, m == 0 等 #=> それぞれ計算
2. m == 1 #=> リニアサーチ
3. 2 <= m <= SIZEOF_VALUE #=> 単純にシフトして比較
4. SIZEOF_VALUE < m <= SIZEOF_VALUE * CHAR_BIT #=> Shift Or
5. SIZEOF_VALUE * CHAR_BIT < m #=> Quick Search
で試してました。

「単純にシフトして比較」は hashing しない Karp-Rabin のような感じです。
1 WORD にバイト列をそのまま、32bit システムなら 4 文字、
64bit システムなら 8 文字格納して比較します。
このレベルの処理だとテーブル参照のコストが以外と効いてくるので、
それなりに差が出てきました。

2. や 3. をいちいち別にする必要があるかという議論はあると思いますが、
rb_memsearch の主たる用途である String#index がもっぱら数文字を引数として
呼ばれることを考えれば、妥当ではないかなぁと。
2. と 3. はだいたい今の 1.5 倍くらいになります。

4. は Shift-Or が効くかと思ったんですが、
実際にはもう Quick Search の方が速くなっていました。

5. は非常に高速なのですが そのまま実装すると UTF-8 の場合が問題でして、
例えば、ひらがなの中からひらがなを探すとかのような、
同じ 1/2 ブロック内での検索だと 3 バイトのうちの 2 バイトがいたるところで
ヒットするので、期待したほどの速度がでなくなります。
そのため、UTF-8 の場合には文字単位で比較するようにしています。
この対策の有無で、単純な Quick Search の最悪のケースから 3 倍ほど変わります。
# 壊れた UTF-8 文字列でもちゃんと動くはずです


これらの対策を施した場合のベンチマークは以下のとおりです。
rad.rb        user     system      total        real
ruby-18  2.140625   0.007812   2.148438 (  2.153251)
ruby-19  2.171875   0.007812   2.179688 (  2.180716)
patched  0.343750   0.007812   0.351562 (  0.358093)

rad8.rb        user     system      total        real
ruby-19  79.687500   0.093750  79.781250 ( 79.725523)
patched  47.640625   0.085938  47.726562 ( 47.701325)

rad.rb は [ruby-core:15508] で Radarek さんが出していたベンチマークです。
この時点で現在同様 Ruby 1.9 は 1.8 と同等の速度になっていましたが、
これがさらに 7 倍速くなります。

rad8.rb は対象のテキストを Ruby 1.9 添付の README.EXT.ja を、
UTF-8 に変換したデータに変えたものです。
こちらでは 1.5 倍になっています。

添付のパッチには一応参考までに Shift-Or を用いたバージョンもつけておきました。

場合分けの手間に見合った速度の向上はあると思うのですが、いかがでしょう。

-- 
NARUSE, Yui  <naruse@airemix.com>
DBDB A476 FDBD 9450 02CD 0EFC BCE3 C388 472E C1EA

Attachments (1)

memsearch.patch (6.46 KB, text/x-diff)

--- include/ruby/encoding.h	(revision 15787)
+++ include/ruby/encoding.h	(working copy)
@@ -176,5 +176,6 @@ int rb_usascii_encindex(void);
 VALUE rb_enc_default_external(void);
 void rb_enc_set_default_external(VALUE encoding);
 VALUE rb_locale_charmap(VALUE klass);
+long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
 
 #endif /* RUBY_ENCODING_H */
--- include/ruby/intern.h	(revision 15787)
+++ include/ruby/intern.h	(working copy)
@@ -466,7 +466,6 @@ double rb_genrand_real(void);
 /* re.c */
 #define rb_memcmp memcmp
 int rb_memcicmp(const void*,const void*,long);
-long rb_memsearch(const void*,long,const void*,long);
 VALUE rb_reg_nth_defined(int, VALUE);
 VALUE rb_reg_nth_match(int, VALUE);
 VALUE rb_reg_last_match(VALUE);
--- re.c	(revision 15787)
+++ re.c	(working copy)
@@ -95,41 +95,202 @@ rb_memcmp(const void *p1, const void *p2
     return memcmp(p1, p2, len);
 }
 
-long
-rb_memsearch(const void *x0, long m, const void *y0, long n)
+static inline long
+rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
 {
-    const unsigned char *x = x0, *y = y0;
-    const unsigned char *s, *e;
-    long i;
-    int d;
-    unsigned long hx, hy;
+    const unsigned char *x = xs, *xe = xs + m;
+    const unsigned char *y = ys, *ye = ys + n;
+#ifndef VALUE_MAX
+# if SIZEOF_VALUE == 8
+#  define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL
+# elif SIZEOF_VALUE == 4
+#  define VALUE_MAX 0xFFFFFFFFUL
+# endif
+#endif
+    VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
+
+    if (m > SIZEOF_VALUE)
+	rb_bug("!!too long pattern string!!");
+
+    	/* Prepare hash value */
+    for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
+	hx <<= CHAR_BIT;
+	hy <<= CHAR_BIT;
+	hx |= *x;
+	hy |= *y;
+    }
+    /* Searching */
+    while (hx != hy) {
+	if (y == ye)
+	    return -1;
+	hy <<= CHAR_BIT;
+	hy |= *y;
+	hy &= mask;
+	y++;
+    }
+    return y - ys - m;
+}
 
-#define KR_REHASH(a, b, h) (((h) << 1) - (((unsigned long)(a))<<d) + (b))
+static inline long
+rb_memsearch_so(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+    const unsigned char *x = xs, *xe = xs + m;
+    const unsigned char *y = ys, *ye = ys + n;
+    VALUE i, lim, state, table[256];
 
-    if (m > n) return -1;
-    s = y; e = s + n - m;
+    /* Preprocessing */
+    for (i = 0; i < 256; ++i)
+	table[i] = ~0;
+    for (lim = 0, i = 1; x < xe; ++x, i <<= 1) {
+	table[*x] &= ~i;
+	lim |= i;
+    }
+    lim = ~(lim>>1);
+    /* Searching */
+    for (state = ~0; y < ye; ++y) {
+	state <<= 1;
+	state |= table[*y];
+	if (state < lim)
+	    return (y - ys - m + 1);
+    }
+    return -1;
+}
+
+static inline long
+rb_memsearch_kr(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+    const unsigned char *x = xs, *xe = xs + m;
+    const unsigned char *y = ys, *ye = ys + n;
+    VALUE hx, hy;
+    long d = sizeof(hx) * CHAR_BIT - 1;
+#define KR_REHASH(a, b, h) (((h) << 1) - (((VALUE)(a))<<d) + (b))
 
     /* Preprocessing */
     /* computes d = 2^(m-1) with
        the left-shift operator */
-    d = sizeof(hx) * CHAR_BIT - 1;
     if (d > m) d = m;
+    /* Prepare hash value */
+    for (hy = hx = 0; x < xe; ++x, ++y) {
+	hx = KR_REHASH(0, *x, hx);
+	hy = KR_REHASH(0, *y, hy);
+    }
+    /* Searching */
+    while (hx != hy || memcmp(xs, y - m, m)) {
+    	if (y == ye)
+	    return -1;
+	hy = KR_REHASH(*(y-d), *y, hy);
+	++y;
+    }
+    return y - ys;
+}
 
-    if (n == m) {
-	return memcmp(x, s, m) == 0 ? 0 : -1;
+static inline long
+rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+    const unsigned char *x = xs, *xe = xs + m;
+    const unsigned char *y = ys, *ye = ys + n;
+    VALUE i, qstable[256];
+
+    /* Preprocessing */
+    for (i = 0; i < 256; ++i)
+	qstable[i] = m + 1;
+    for (; x < xe; ++x)
+	qstable[*x] = xe - x;
+    /* Searching */
+    for (; y < ye; y += *(qstable + y[m])) {
+	if (*xs == *y && memcmp(xs, y, m) == 0)
+	    return y - ys;
+    }
+    return -1;
+}
+
+static inline unsigned int
+rb_memsearch_qs_utf8_hash(const unsigned char *x)
+{
+    register const unsigned int mix = 8353;
+    register unsigned int h = *x;
+    if (h < 0xC0) {
+	return h + 256;
+    }
+    else if (h < 0xE0) {
+	h *= mix;
+	h += x[1];
+    }
+    else if (h < 0xF0) {
+	h *= mix;
+	h += x[1];
+	h *= mix;
+	h += x[2];
+    }
+    else if (h < 0xF5) {
+	h *= mix;
+	h += x[1];
+	h *= mix;
+	h += x[2];
+	h *= mix;
+	h += x[3];
     }
-    /* Prepare hash value */
-    for (hy = hx = i = 0; i < d; ++i) {
-	hx = KR_REHASH(0, x[i], hx);
-	hy = KR_REHASH(0, s[i], hy);
+    else {
+	return h + 256;
+    }
+    return (unsigned char)h;
+}
+
+static inline long
+rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+    const unsigned char *x = xs, *xe = xs + m;
+    const unsigned char *y = ys, *ye = ys + n;
+    VALUE i, qstable[512];
+
+    /* Preprocessing */
+    for (i = 0; i < 512; ++i) {
+	qstable[i] = m + 1;
+    }
+    for (; x < xe; ++x) {
+	qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
     }
     /* Searching */
-    while (hx != hy || memcmp(x, s, m)) {
-	if (s >= e) return -1;
-	hy = KR_REHASH(*s, *(s+d), hy);
-	s++;
+    for (; y < ye; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
+	if (*xs == *y && memcmp(xs, y, m) == 0)
+	    return y - ys;
+    }
+    return -1;
+}
+
+long
+rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
+{
+    const unsigned char *x = x0, *y = y0;
+
+    if (m > n) return -1;
+    else if (m == n) {
+	return memcmp(x0, y0, m) == 0 ? 0 : -1;
+    }
+    else if (m < 1) {
+	return 0;
+    }
+    else if (m == 1) {
+	const unsigned char *ys = y, *ye = ys + n;
+	for (; y < ye; ++y) {
+	    if (*x == *y)
+		return y - ys;
+	}
+	return -1;
+    }
+    else if (m <= SIZEOF_VALUE) {
+	return rb_memsearch_ss(x0, m, y0, n);
+//    }
+//    else if (m <= SIZEOF_VALUE * CHAR_BIT) {
+//	//return rb_memsearch_kr(x0, m, y0, n);
+//	return rb_memsearch_so(x0, m, y0, n);
+    }
+    else if (enc == rb_utf8_encoding()){
+	return rb_memsearch_qs_utf8(x0, m, y0, n);
+    }
+    else {
+	return rb_memsearch_qs(x0, m, y0, n);
     }
-    return s-y;
 }
 
 #define REG_LITERAL FL_USER5
--- string.c	(revision 15787)
+++ string.c	(working copy)
@@ -2088,7 +2088,7 @@ rb_str_index(VALUE str, VALUE sub, long 
     len = RSTRING_LEN(str) - offset;
     for (;;) {
 	char *t;
-	pos = rb_memsearch(sptr, slen, s, len);
+	pos = rb_memsearch(sptr, slen, s, len, enc);
 	if (pos < 0) return pos;
 	t = rb_enc_right_char_head(s, s+pos, enc);
 	if (t == s + pos) break;

Thread

Prev Next

In This Thread

Prev Next

[#33946] Symbol#inspect should use rb_str_inspect — sheepman <sheepman@...>

[#33947] Ruby 1.9.0-1 snapshot released — Yukihiro Matsumoto <matz@...>

[#33948] Schedule for the 1.8.7 release — "Akinori MUSHA" <knu@...>

[#33950] Ruby 1.9.0-1 build failure: error: `r15660' undeclared — "Hiroshi Ichikawa" <gimite@...>

[#33952] Float('1__1') — Tadayoshi Funaba <tadf@...>

[#33954] Mac OS X 10.5.2でruby-1.9.0-1のmake testが失敗 — 高尾 宏治 <kouji@...>

[#33955] --encoding affects script encoding — sheepman <sheepman@...>

[#33962] Ruby1.9.0でのインタプリタ組み込みについての質問 — Masayuki Yamaguchi <Yamaguchi.Masayuki@...>

[#33966] Re: [ruby-cvs:22881] Ruby:r15644 (trunk): * test/ruby/test_m17n_comb.rb (TestM17NComb::test_str_chomp): test — Tanaka Akira <akr@...>

[#33967] $SAFE=1でLoadErrorではなくInsecure operationになる — "Ken Date" <itacchi@...>

[#33972] なぜ transcode.c の transcode_loop で from_utf8 が特別扱い — Martin Duerst <duerst@...>

[#33974] Test::Unit::Collector::Dirがtest_*.rb以外集めてくれない — "Ken Date" <itacchi@...>

[#33983] Re: [ruby-cvs:22913] Re: Ruby:r15674 (trunk): * gc.c (add_heap): sort heaps array in ascending order to use — Yukihiro Matsumoto <matz@...>

[#33989] Hash#compare_by_identity breaks commutativity of Hash#== — Tanaka Akira <akr@...>

[#33991] Maybe IRB bug!! by 09 — Tanaka Akira <akr@...>

[#33992] debug.rb:221:in `debug_command': undefined method `callcc' for #<DEBUGGER__::Context:0xb7a85914> (NoMethodError) — Tanaka Akira <akr@...>

[#33993] Re: [ruby-cvs:22935] Ruby:r15695 (trunk): * string.c (is_utf8_lead_byte, count_utf8_lead_bytes_with_ulong): — Tanaka Akira <akr@...>

[#33994] printf "[%08f]\n", 0.0/0.0 — Tanaka Akira <akr@...>

[#33996] No definition for sym_succ など — Martin Duerst <duerst@...>

[#34000] --program-suffix option of configure.bat(mswin32) — KIMURA Koichi <kimura.koichi@...>

[#34002] sprintf("% e", Inf) — Tanaka Akira <akr@...>

[#34005] Array#slice! may be too slow and allocate memory too much — "Yusuke ENDOH" <mame@...>

[#34006] 11.divmod(3.5) — Tanaka Akira <akr@...>

[#34008] local_variables contains strings — Tanaka Akira <akr@...>

[#34010] RUBY_*定数のencoding — Kazuhiro NISHIYAMA <zn@...>

[#34011] Should --verbose be equal to -v ? — Yugui <yugui@...>

[#34019] DelegateClassのバックトレース — "GO Noguchi" <gonoguti@...>

[#34020] MurmurHash problem — Nobuyoshi Nakada <nobu@...>

[#34021] singleton class extended by a module with module_function — Tanaka Akira <akr@...>

[#34022] patch of lazy sweep gc — authorNari <authornari@...>

[#34030] uint32_t — KIMURA Koichi <kimura.koichi@...>

[#34033] ruby coding guideline — "NARUSE, Yui" <naruse@...>

[#34037] Ruby performance gains on SPARC — Yukihiro Matsumoto <matz@...>

[#34042] IOはこれからもEnumerableなのか — rubikitch@...

[#34048] CGI::Cookie.new を高速化するバッチ — "Makoto Kuwata" <kwa@...>

[#34052] IO should provide #each_char and #chars — Yugui <yugui@...>

[#34059] putsでSEGV — rubikitch@...

[#34066] Bignum#div — Tadayoshi Funaba <tadf@...>

[#34067] Array#take,take_while,drop,drop_whlie — "Yusuke ENDOH" <mame@...>

[#34068] lgamma_r requires _REENTRANT on Solaris — "Yusuke ENDOH" <mame@...>

[#34071] procを組み合わせた再帰呼び出しで例外が投げられない — wanabe <s.wanabe@...>

[#34072] rb_memsearch optimization — "NARUSE, Yui" <naruse@...>

[#34077] 異なるエンコーディングだと同じバイト列でも==にならない件 — rubikitch@...

[#34086] extend spawn to change attributes of child process. — Tanaka Akira <akr@...>

[#34093] 拡張ライブラリ初期化中でのmodule_eval — Kouhei Sutou <kou@...>

[#34094] tempfile.unlink 後に tempfile.close — SATOH Fumiyasu <fumiyas@...>

[#34095] (再送) Cygwin で Resolv.getaddress が失敗する — Kouhei Yanagita <yanagi@...>

[#34100] delegate Kernel#{gets,readline,readlines} to ARGF — Nobuyoshi Nakada <nobu@...>

[#34105] rational.rb, complex.rb and mathn.rb — Tadayoshi Funaba <tadf@...>

[#34109] LP64: date.rb:321:in `convert': integer 86400000000000 too big to convert to `int' (RangeError) — Tanaka Akira <akr@...>

[#34112] Module#define_methodがprivateなわけ — rubikitch@...

[#34114] Is 'Class.new(Class)' valid? — rucila <rucila@...>

[#34116] Improving Fixnum#gcd(Fixnum) — "Akinori MUSHA" <knu@...>

[#34117] backporting features from 1.9 to 1.8 — "Akinori MUSHA" <knu@...>

[#34119] Process.daemon kills other threads — Tanaka Akira <akr@...>

[#34122] --disable=all --enable=gems — Kazuhiro NISHIYAMA <zn@...>

[#34128] enumerator with certain built-in methods dumps core — "Yusuke ENDOH" <mame@...>

[#34136] thread.c (fastthread)の状況 — Daigo Moriwaki <techml@...>

[#34144] [質問２点] C からの定数参照 & thread switching コストの低減 — Hidetoshi NAGAI <nagai@...>

[#34150] sigsetjmp significantly slower on OSX. — Yukihiro Matsumoto <matz@...>

[#34158] Complex組み込み — Masahiro TANAKA <masa16.tanaka@...>

[#34159] ruby-trunk Marshal.dump bug — nagachika <rucila@...>

[#34162] GC heap size less patch — authorNari <authornari@...>

[#34163] Array#shift/unshift の高速化 — wanabe <s.wanabe@...>

[#34180] メソッドにクラスを定義させるとprivateになる？ — rubikitch@...

[#34184] Re: [ruby-cvs:23097] Ruby:r15858 (trunk): * io.c: IO.copy_stream implemented. — "U.Nakamura" <usa@...>

[#34189] Re: [ruby-cvs:23106] Re: Ruby:r15866 (trunk): * numeric.c (num_quo): should convert its operand to Rational. — Tadayoshi Funaba <tadf@...>

[#34191] Object#singleton_class — rubikitch@...

[ruby-dev:34072] rb_memsearch optimization

Attachments (1)

Thread

In This Thread

[#33954] Mac OS X 10.5.2でruby-1.9.0-1のmake testが失敗 — 高尾宏治 <kouji@...>

[#34072] rb_memsearch　optimization — "NARUSE, Yui" <naruse@...>

[ruby-dev:34072] rb_memsearch　optimization