[#42564] [Ruby 1.9-Feature#4043][Open] グローバル関数current_classの提案 — Makoto Kishimoto <redmine@...>

Feature #4043: =E3=82=B0=E3=83=AD=E3=83=BC=E3=83=90=E3=83=AB=E9=96=A2=E6=95=

15 messages 2010/11/11
[#42774] Re: [Ruby 1.9-Feature#4043][Open] グローバル関数current_classの提案 — Yukihiro Matsumoto <matz@...> 2010/12/16

まつもと ゆきひろです

[#42834] Re: [Ruby 1.9-Feature#4043][Open] グローバル関数current_classの提案 — "KISHIMOTO, Makoto" <ksmakoto@...4u.or.jp> 2010/12/21

きしもとです

[#42835] Re: [Ruby 1.9-Feature#4043][Open] グローバル関数current_classの提案 — Yukihiro Matsumoto <matz@...> 2010/12/21

まつもと ゆきひろです

[#42838] Re: [Ruby 1.9-Feature#4043][Open] グローバル関数current_classの提案 — "KISHIMOTO, Makoto" <ksmakoto@...4u.or.jp> 2010/12/21

きしもとです

[#42845] Re: [Ruby 1.9-Feature#4043][Open] グローバル関数current_classの提案 — Yukihiro Matsumoto <matz@...> 2010/12/21

まつもと ゆきひろです

[#42577] Rubyのバグレポートのガイドライン — "Shota Fukumori (sora_h)" <sorah@...>

sora_hです。

11 messages 2010/11/15
[#42588] Re: Rubyのバグレポートのガイドライン — Yugui <yugui@...> 2010/11/18

2010/11/15 Shota Fukumori (sora_h) <sorah@tubusu.net>:

[#42638] Enumerable#categorize — Tanaka Akira <akr@...>

enumerable から hash を生成するメソッドとして

25 messages 2010/11/27
[#42643] Re: Enumerable#categorize — Yukihiro Matsumoto <matz@...> 2010/11/27

まつもと ゆきひろです

[ruby-dev:42659] Re: Enumerable#categorize

From: Tanaka Akira <akr@...>
Date: 2010-11-29 13:34:55 UTC
List: ruby-dev #42659
2010年11月29日13:56 Yukihiro Matsumoto <matz@ruby-lang.org>:

> 挙動としては大変好みです。ただし、 categorize はともかく、
> hashmap はいまいち名前に自信が持てないので、更なる調査(既存
> の言語に類似機能はないか)が必要かもしれません。また、他の方
> のご意見も歓迎します。ruby-coreにも聞いてみるべきかな。

とりあえず categorize だけ C で実装してみました。

(:update の仕様は配列を削減するため微妙に変えてあります。)

Index: enum.c
===================================================================
--- enum.c	(revision 29971)
+++ enum.c	(working copy)
@@ -15,7 +15,7 @@
 #include "id.h"

 VALUE rb_mEnumerable;
-static ID id_next;
+static ID id_next, id_call, id_seed, id_op, id_update;
 #define id_each idEach
 #define id_eqq  idEqq
 #define id_cmp  idCmp
@@ -2595,6 +2595,160 @@ enum_slice_before(int argc, VALUE *argv,
     return enumerator;
 }

+struct categorize_arg {
+    VALUE seed;
+    VALUE op;
+    VALUE update;
+    VALUE result;
+};
+
+static VALUE
+categorize_update(struct categorize_arg *argp, VALUE ary, VALUE acc, VALUE val)
+{
+    if (argp->op != Qundef) {
+        if (SYMBOL_P(argp->op))
+            return rb_funcall(acc, SYM2ID(argp->op), 1, val);
+        else
+            return rb_funcall(argp->op, id_call, 2, acc, val);
+    }
+    else if (argp->update != Qundef) {
+        if (SYMBOL_P(argp->update))
+            return rb_funcall(acc, SYM2ID(argp->update), 1, ary);
+        else
+            return rb_funcall(argp->update, id_call, 2, acc, ary);
+    }
+    else {
+        if (NIL_P(acc))
+            return rb_ary_new3(1, val);
+        else {
+            Check_Type(acc, T_ARRAY);
+            rb_ary_push(acc, val);
+            return acc;
+        }
+    }
+}
+
+static VALUE
+categorize_i(VALUE i, VALUE _arg, int argc, VALUE *argv)
+{
+    struct categorize_arg *argp;
+    VALUE ary, h;
+    VALUE lastk, val, acc;
+    long j;
+
+    ENUM_WANT_SVALUE();
+
+    argp = (struct categorize_arg *)_arg;
+
+    ary = rb_yield(i);
+    ary = rb_convert_type(ary, T_ARRAY, "Array", "to_ary");
+    if (RARRAY_LEN(ary) < 2) {
+        rb_raise(rb_eArgError, "array too short");
+    }
+    lastk = RARRAY_PTR(ary)[RARRAY_LEN(ary)-2];
+    val = RARRAY_PTR(ary)[RARRAY_LEN(ary)-1];
+    h = argp->result;
+    for (j = 0; j < RARRAY_LEN(ary) - 2; j++) {
+        VALUE k = RARRAY_PTR(ary)[j];
+        VALUE h2;
+        h2 = rb_hash_lookup2(h, k, Qundef);
+        if (h2 == Qundef) {
+            h2 = rb_hash_new();
+            rb_hash_aset(h, k, h2);
+        }
+        h = h2;
+    }
+    acc = rb_hash_lookup2(h, lastk, Qundef);
+    if (acc == Qundef) {
+        if (argp->seed == Qundef)
+            acc = val;
+        else
+            acc = categorize_update(argp, ary, argp->seed, val);
+    }
+    else {
+        acc = categorize_update(argp, ary, acc, val);
+    }
+    rb_hash_aset(h, lastk, acc);
+    return Qnil;
+}
+
+/*
+ * call-seq:
+ *   enum.categorize([opts]) {|elt| [key1, ..., val] } -> hash
+ *
+ * categorizes the elements in _enum_ and returns a hash.
+ *
+ * The block is called for each elements in _enum_.
+ * The block should return an array which contains
+ * one or more keys and one value.
+ *
+ * The keys and value are used to construct the result hash.
+ * If two or more keys are provided
+ * (i.e. the length of the array is longer than 2),
+ * the result hash will be nested.
+ *
+ * The value of innermost hash is an array which contains values for
+ * corresponding keys.
+ * (This behavior can be customized by :seed, :op and :update option.)
+ *
+ *   a = [{:fruit => "banana", :color => "yellow", :taste => "sweet"},
+ *        {:fruit => "melon", :color => "green", :taste => "sweet"},
+ *        {:fruit => "grapefruit", :color => "yellow", :taste => "tart"}]
+ *   p a.categorize {|h| h.values_at(:color, :fruit) }
+ *   #=> {"yellow"=>["banana", "grapefruit"], "green"=>["melon"]}
+ *
+ *   pp a.categorize {|h| h.values_at(:taste, :color, :fruit) }
+ *   #=> {"sweet"=>{"yellow"=>["banana"], "green"=>["melon"]},
+ *   #    "tart"=>{"yellow"=>["grapefruit"]}}
+ *
+ * This method can take an option hash.
+ * Available options are follows:
+ *
+ * - :seed specifies seed value.
+ * - :op specifies a procedure from seed and value to next seed.
+ * - :update specifies a procedure from seed and block value to next seed.
+ *
+ * The default behavior, array construction, can be implemented as follows.
+ *   :seed => nil
+ *   :op => lambda {|s, v| !s ? [v] : (s << v) }
+ *
+ */
+static VALUE
+enum_categorize(int argc, VALUE *argv, VALUE enumerable)
+{
+    VALUE opts;
+    struct categorize_arg arg;
+
+    RETURN_ENUMERATOR(enumerable, 0, 0);
+
+    rb_scan_args(argc, argv, "01", &opts);
+
+    if (NIL_P(opts)) {
+        arg.seed = Qnil;
+        arg.op = Qundef;
+        arg.update = Qundef;
+    }
+    else {
+        opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
+        arg.seed = rb_hash_lookup2(opts, ID2SYM(id_seed), Qundef);
+        arg.op = rb_hash_lookup2(opts, ID2SYM(id_op), Qundef);
+        arg.update = rb_hash_lookup2(opts, ID2SYM(id_update), Qundef);
+        if (arg.op != Qundef && arg.update != Qundef) {
+            rb_raise(rb_eArgError, "both :update and :op specified");
+        }
+        if (arg.op != Qundef && !SYMBOL_P(arg.op))
+            arg.op = rb_convert_type(arg.op, T_DATA, "Proc", "to_proc");
+        if (arg.update != Qundef && !SYMBOL_P(arg.update))
+            arg.update = rb_convert_type(arg.update, T_DATA, "Proc",
"to_proc");
+    }
+
+    arg.result = rb_hash_new();
+
+    rb_block_call(enumerable, id_each, 0, 0, categorize_i, (VALUE)&arg);
+
+    return arg.result;
+}
+
 /*
  *  The <code>Enumerable</code> mixin provides collection classes with
  *  several traversal and searching methods, and with the ability to
@@ -2662,6 +2816,11 @@ Init_Enumerable(void)
     rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1);
     rb_define_method(rb_mEnumerable, "chunk", enum_chunk, -1);
     rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1);
+    rb_define_method(rb_mEnumerable, "categorize", enum_categorize, -1);

     id_next = rb_intern("next");
+    id_call = rb_intern("call");
+    id_seed = rb_intern("seed");
+    id_op = rb_intern("op");
+    id_update = rb_intern("update");
 }
Index: test/ruby/test_enum.rb
===================================================================
--- test/ruby/test_enum.rb	(revision 29971)
+++ test/ruby/test_enum.rb	(working copy)
@@ -384,4 +384,33 @@ class TestEnumerable < Test::Unit::TestC
                  ss.slice_before(/\A...\z/).to_a)
   end

+  def test_categorize
+    assert_equal((1..6).group_by {|i| i % 3 },
+                 (1..6).categorize {|e| [e % 3, e] })
+    assert_equal(Hash[ [ ["a", 100], ["b", 200] ] ],
+                 [ ["a", 100], ["b", 200] ].categorize(:op=>lambda
{|x,y| y }) {|e| e })
+    h = { "n" => 100, "m" => 100, "y" => 300, "d" => 200, "a" => 0 }
+    assert_equal(h.invert,
+                 h.categorize(:op=>lambda {|x,y| y }) {|k, v| [v, k] })
+    assert_equal({"f"=>1, "o"=>2, "b"=>2, "a"=>2, "r"=>1, "z"=>1},
+                 "foobarbaz".split(//).categorize(:op=>:+) {|ch| [ch, 1] })
+    assert_equal({"f"=>1, "o"=>2, "b"=>2, "a"=>2, "r"=>1, "z"=>1},
+                 "foobarbaz".split(//).categorize(:update=>lambda
{|s, a| s + a.last }) {|ch| [ch, 1] })
+    assert_equal({"f"=>["f", 1],
+                  "o"=>["o", 1, "o", 1],
+                  "b"=>["b", 1, "b", 1],
+                  "a"=>["a", 1, "a", 1],
+                  "r"=>["r", 1],
+                  "z"=>["z", 1]},
+                 "foobarbaz".split(//).categorize(:seed=>[],
:update=>:+) {|ch| [ch, 1] })
+    assert_raise(ArgumentError) { [0].categorize {|e| [] } }
+    assert_raise(ArgumentError) { [0].categorize {|e| [1] } }
+    assert_equal(
+      {"f"=>{"o"=>{"o"=>{:c=>1}}},
+       "b"=>{"a"=>{"r"=>{:c=>1},
+                   "z"=>{:c=>1}}}},
+      %w[foo bar baz].categorize(:op=>:+) {|s| s.split(//) + [:c, 1] })
+
+  end
+
 end
-- 
[田中 哲][たなか あきら][Tanaka Akira]

In This Thread