[ruby-dev:42659] Re: Enumerable#categorize
From:
Tanaka Akira <akr@...>
Date:
2010-11-29 13:34:55 UTC
List:
ruby-dev #42659
2010年11月29日13:56 Yukihiro Matsumoto <matz@ruby-lang.org>:
> 挙動としては大変好みです。ただし、 categorize はともかく、
> hashmap はいまいち名前に自信が持てないので、更なる調査(既存
> の言語に類似機能はないか)が必要かもしれません。また、他の方
> のご意見も歓迎します。ruby-coreにも聞いてみるべきかな。
とりあえず categorize だけ C で実装してみました。
(:update の仕様は配列を削減するため微妙に変えてあります。)
Index: enum.c
===================================================================
--- enum.c (revision 29971)
+++ enum.c (working copy)
@@ -15,7 +15,7 @@
#include "id.h"
VALUE rb_mEnumerable;
-static ID id_next;
+static ID id_next, id_call, id_seed, id_op, id_update;
#define id_each idEach
#define id_eqq idEqq
#define id_cmp idCmp
@@ -2595,6 +2595,160 @@ enum_slice_before(int argc, VALUE *argv,
return enumerator;
}
+struct categorize_arg {
+ VALUE seed;
+ VALUE op;
+ VALUE update;
+ VALUE result;
+};
+
+static VALUE
+categorize_update(struct categorize_arg *argp, VALUE ary, VALUE acc, VALUE val)
+{
+ if (argp->op != Qundef) {
+ if (SYMBOL_P(argp->op))
+ return rb_funcall(acc, SYM2ID(argp->op), 1, val);
+ else
+ return rb_funcall(argp->op, id_call, 2, acc, val);
+ }
+ else if (argp->update != Qundef) {
+ if (SYMBOL_P(argp->update))
+ return rb_funcall(acc, SYM2ID(argp->update), 1, ary);
+ else
+ return rb_funcall(argp->update, id_call, 2, acc, ary);
+ }
+ else {
+ if (NIL_P(acc))
+ return rb_ary_new3(1, val);
+ else {
+ Check_Type(acc, T_ARRAY);
+ rb_ary_push(acc, val);
+ return acc;
+ }
+ }
+}
+
+static VALUE
+categorize_i(VALUE i, VALUE _arg, int argc, VALUE *argv)
+{
+ struct categorize_arg *argp;
+ VALUE ary, h;
+ VALUE lastk, val, acc;
+ long j;
+
+ ENUM_WANT_SVALUE();
+
+ argp = (struct categorize_arg *)_arg;
+
+ ary = rb_yield(i);
+ ary = rb_convert_type(ary, T_ARRAY, "Array", "to_ary");
+ if (RARRAY_LEN(ary) < 2) {
+ rb_raise(rb_eArgError, "array too short");
+ }
+ lastk = RARRAY_PTR(ary)[RARRAY_LEN(ary)-2];
+ val = RARRAY_PTR(ary)[RARRAY_LEN(ary)-1];
+ h = argp->result;
+ for (j = 0; j < RARRAY_LEN(ary) - 2; j++) {
+ VALUE k = RARRAY_PTR(ary)[j];
+ VALUE h2;
+ h2 = rb_hash_lookup2(h, k, Qundef);
+ if (h2 == Qundef) {
+ h2 = rb_hash_new();
+ rb_hash_aset(h, k, h2);
+ }
+ h = h2;
+ }
+ acc = rb_hash_lookup2(h, lastk, Qundef);
+ if (acc == Qundef) {
+ if (argp->seed == Qundef)
+ acc = val;
+ else
+ acc = categorize_update(argp, ary, argp->seed, val);
+ }
+ else {
+ acc = categorize_update(argp, ary, acc, val);
+ }
+ rb_hash_aset(h, lastk, acc);
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * enum.categorize([opts]) {|elt| [key1, ..., val] } -> hash
+ *
+ * categorizes the elements in _enum_ and returns a hash.
+ *
+ * The block is called for each elements in _enum_.
+ * The block should return an array which contains
+ * one or more keys and one value.
+ *
+ * The keys and value are used to construct the result hash.
+ * If two or more keys are provided
+ * (i.e. the length of the array is longer than 2),
+ * the result hash will be nested.
+ *
+ * The value of innermost hash is an array which contains values for
+ * corresponding keys.
+ * (This behavior can be customized by :seed, :op and :update option.)
+ *
+ * a = [{:fruit => "banana", :color => "yellow", :taste => "sweet"},
+ * {:fruit => "melon", :color => "green", :taste => "sweet"},
+ * {:fruit => "grapefruit", :color => "yellow", :taste => "tart"}]
+ * p a.categorize {|h| h.values_at(:color, :fruit) }
+ * #=> {"yellow"=>["banana", "grapefruit"], "green"=>["melon"]}
+ *
+ * pp a.categorize {|h| h.values_at(:taste, :color, :fruit) }
+ * #=> {"sweet"=>{"yellow"=>["banana"], "green"=>["melon"]},
+ * # "tart"=>{"yellow"=>["grapefruit"]}}
+ *
+ * This method can take an option hash.
+ * Available options are follows:
+ *
+ * - :seed specifies seed value.
+ * - :op specifies a procedure from seed and value to next seed.
+ * - :update specifies a procedure from seed and block value to next seed.
+ *
+ * The default behavior, array construction, can be implemented as follows.
+ * :seed => nil
+ * :op => lambda {|s, v| !s ? [v] : (s << v) }
+ *
+ */
+static VALUE
+enum_categorize(int argc, VALUE *argv, VALUE enumerable)
+{
+ VALUE opts;
+ struct categorize_arg arg;
+
+ RETURN_ENUMERATOR(enumerable, 0, 0);
+
+ rb_scan_args(argc, argv, "01", &opts);
+
+ if (NIL_P(opts)) {
+ arg.seed = Qnil;
+ arg.op = Qundef;
+ arg.update = Qundef;
+ }
+ else {
+ opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
+ arg.seed = rb_hash_lookup2(opts, ID2SYM(id_seed), Qundef);
+ arg.op = rb_hash_lookup2(opts, ID2SYM(id_op), Qundef);
+ arg.update = rb_hash_lookup2(opts, ID2SYM(id_update), Qundef);
+ if (arg.op != Qundef && arg.update != Qundef) {
+ rb_raise(rb_eArgError, "both :update and :op specified");
+ }
+ if (arg.op != Qundef && !SYMBOL_P(arg.op))
+ arg.op = rb_convert_type(arg.op, T_DATA, "Proc", "to_proc");
+ if (arg.update != Qundef && !SYMBOL_P(arg.update))
+ arg.update = rb_convert_type(arg.update, T_DATA, "Proc",
"to_proc");
+ }
+
+ arg.result = rb_hash_new();
+
+ rb_block_call(enumerable, id_each, 0, 0, categorize_i, (VALUE)&arg);
+
+ return arg.result;
+}
+
/*
* The <code>Enumerable</code> mixin provides collection classes with
* several traversal and searching methods, and with the ability to
@@ -2662,6 +2816,11 @@ Init_Enumerable(void)
rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1);
rb_define_method(rb_mEnumerable, "chunk", enum_chunk, -1);
rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1);
+ rb_define_method(rb_mEnumerable, "categorize", enum_categorize, -1);
id_next = rb_intern("next");
+ id_call = rb_intern("call");
+ id_seed = rb_intern("seed");
+ id_op = rb_intern("op");
+ id_update = rb_intern("update");
}
Index: test/ruby/test_enum.rb
===================================================================
--- test/ruby/test_enum.rb (revision 29971)
+++ test/ruby/test_enum.rb (working copy)
@@ -384,4 +384,33 @@ class TestEnumerable < Test::Unit::TestC
ss.slice_before(/\A...\z/).to_a)
end
+ def test_categorize
+ assert_equal((1..6).group_by {|i| i % 3 },
+ (1..6).categorize {|e| [e % 3, e] })
+ assert_equal(Hash[ [ ["a", 100], ["b", 200] ] ],
+ [ ["a", 100], ["b", 200] ].categorize(:op=>lambda
{|x,y| y }) {|e| e })
+ h = { "n" => 100, "m" => 100, "y" => 300, "d" => 200, "a" => 0 }
+ assert_equal(h.invert,
+ h.categorize(:op=>lambda {|x,y| y }) {|k, v| [v, k] })
+ assert_equal({"f"=>1, "o"=>2, "b"=>2, "a"=>2, "r"=>1, "z"=>1},
+ "foobarbaz".split(//).categorize(:op=>:+) {|ch| [ch, 1] })
+ assert_equal({"f"=>1, "o"=>2, "b"=>2, "a"=>2, "r"=>1, "z"=>1},
+ "foobarbaz".split(//).categorize(:update=>lambda
{|s, a| s + a.last }) {|ch| [ch, 1] })
+ assert_equal({"f"=>["f", 1],
+ "o"=>["o", 1, "o", 1],
+ "b"=>["b", 1, "b", 1],
+ "a"=>["a", 1, "a", 1],
+ "r"=>["r", 1],
+ "z"=>["z", 1]},
+ "foobarbaz".split(//).categorize(:seed=>[],
:update=>:+) {|ch| [ch, 1] })
+ assert_raise(ArgumentError) { [0].categorize {|e| [] } }
+ assert_raise(ArgumentError) { [0].categorize {|e| [1] } }
+ assert_equal(
+ {"f"=>{"o"=>{"o"=>{:c=>1}}},
+ "b"=>{"a"=>{"r"=>{:c=>1},
+ "z"=>{:c=>1}}}},
+ %w[foo bar baz].categorize(:op=>:+) {|s| s.split(//) + [:c, 1] })
+
+ end
+
end
--
[田中 哲][たなか あきら][Tanaka Akira]