[ruby-dev:38449] Re: Enumerable#gather_each
From:
Tanaka Akira <akr@...>
Date:
2009-05-14 14:35:33 UTC
List:
ruby-dev #38449
In article <87d4acfn27.fsf@fsij.org>,
Tanaka Akira <akr@fsij.org> writes:
> Python のように、each のブロックの第一引数に渡してもいいのか
> もしれません。
>
> enum.gather(init_state) {|elt,state| ... }.each {|category,ary| ... }
こうすると、これが group_by と似ていることがよくわかります。
% ./ruby -e '(1..10).gather {|e| e & 4 }.each {|x| p x }'
[0, [1, 2, 3]]
[4, [4, 5, 6, 7]]
[0, [8, 9, 10]]
% ./ruby -e '(1..10).group_by {|e| e & 4 }.each {|x| p x }'
[0, [1, 2, 3, 8, 9, 10]]
[4, [4, 5, 6, 7]]
gather は 1, 2, 3 と 8, 9, 10 がわかれていますが、
group_by は最後までみるのでそれらがまとまっています。
まぁ、返り値が Enumerator と Hash と違ったりもするので、全体
として似ているかというとまた別ですが。
というように実装してみたものをつけておきます。
% svn diff --diff-cmd diff -x '-u -p'
Index: enum.c
===================================================================
--- enum.c (revision 23381)
+++ enum.c (working copy)
@@ -1793,6 +1793,247 @@ enum_cycle(int argc, VALUE *argv, VALUE
return Qnil; /* not reached */
}
+struct chunk_by_arg {
+ VALUE categorize;
+ VALUE state;
+ VALUE prev_value;
+ VALUE prev_elts;
+ VALUE yielder;
+};
+
+static VALUE
+chunk_by_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
+{
+ struct chunk_by_arg *argp = (struct chunk_by_arg *)_argp;
+ VALUE v;
+ VALUE singleton = ID2SYM(rb_intern("_singleton"));
+ VALUE separator = ID2SYM(rb_intern("_separator"));
+
+ ENUM_WANT_SVALUE();
+
+ v = rb_funcall(argp->categorize, rb_intern("call"), 2, i, argp->state);
+
+ if (v == singleton) {
+ if (!NIL_P(argp->prev_value)) {
+ rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
+ argp->prev_value = argp->prev_elts = Qnil;
+ }
+ rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(v, rb_ary_new3(1, i)));
+ }
+ else if (!RTEST(v) || v == separator) {
+ if (!NIL_P(argp->prev_value)) {
+ rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
+ argp->prev_value = argp->prev_elts = Qnil;
+ }
+ }
+ else if (SYMBOL_P(v) && rb_id2name(SYM2ID(v))[0] == '_') {
+ rb_raise(rb_eRuntimeError, "symbol begins with an underscore is reserved");
+ }
+ else {
+ if (NIL_P(argp->prev_value)) {
+ argp->prev_value = v;
+ argp->prev_elts = rb_ary_new3(1, i);
+ }
+ else {
+ if (rb_equal(argp->prev_value, v)) {
+ rb_ary_push(argp->prev_elts, i);
+ }
+ else {
+ rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(argp->prev_value, argp->prev_elts));
+ argp->prev_value = v;
+ argp->prev_elts = rb_ary_new3(1, i);
+ }
+ }
+ }
+ return Qnil;
+}
+
+static VALUE
+chunk_by_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
+{
+ VALUE enumerable;
+ struct chunk_by_arg arg;
+
+ enumerable = rb_ivar_get(enumerator, rb_intern("chunk_by_enumerable"));
+ arg.categorize = rb_ivar_get(enumerator, rb_intern("chunk_by_categorize"));
+ arg.state = rb_ivar_get(enumerator, rb_intern("chunk_by_initial_state"));
+ arg.prev_value = Qnil;
+ arg.prev_elts = Qnil;
+ arg.yielder = yielder;
+
+ if (!NIL_P(arg.state))
+ arg.state = rb_obj_dup(arg.state);
+
+ rb_block_call(enumerable, id_each, 0, 0, chunk_by_ii, (VALUE)&arg);
+ if (!NIL_P(arg.prev_elts))
+ rb_funcall(arg.yielder, rb_intern("<<"), 1, rb_assoc_new(arg.prev_value, arg.prev_elts));
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * enum.chunk_by(initial_state=nil) {|elt, state| ... } => enumerator
+ *
+ * Creates an enumerator for iterating chunked elements of _enum_.
+ *
+ * This method gathers consecutive elements which
+ * the block returns a same value.
+ *
+ * The following values has special meaning:
+ * - nil, false and :_separator specifies that gathered elements is not yielded.
+ * - :_singleton specifies the element should be gathered only itself.
+ *
+ * Other symbols which begins an underscore may be used in future.
+ *
+ * If non-nil value is given for _initial_state_,
+ * it is duplicated for each "each" method invocation of the enumerator.
+ * The duplicated object is passed to second argument of the block for "chunk_by" method..
+ *
+ * (1..10).chunk_by {|n| n & 2 }.each {|a| p a }
+ * #=> [1] # 1 & 2 = 0
+ * # [2, 3] # 2 & 2 = 3 & 2 = 1
+ * # [4, 5] # 4 & 2 = 5 & 2 = 0
+ * # [6, 7] # 6 & 2 = 7 & 2 = 1
+ * # [8, 9] # 8 & 2 = 9 & 2 = 0
+ * # [10] # 10 & 2 = 1
+ *
+ * # gather indented blocks.
+ * io.chunk_by {|line| /\A\s/ =~ line }.each {|lines| pp lines }
+ *
+ * # iterate over svn log entries.
+ * IO.popen([{"LANG"=>"C"}, *%w[svn log enum.c]]) {|f|
+ * sep = "-"*72+"\n"
+ * f.chunk_by {|line| line != sep }.each {|e| pp e }
+ * }
+ */
+static VALUE
+enum_chunk_by(int argc, VALUE *argv, VALUE enumerable)
+{
+ VALUE initial_state;
+ VALUE enumerator;
+
+ rb_scan_args(argc, argv, "01", &initial_state);
+
+ enumerator = rb_obj_alloc(rb_cEnumerator);
+ rb_ivar_set(enumerator, rb_intern("chunk_by_enumerable"), enumerable);
+ rb_ivar_set(enumerator, rb_intern("chunk_by_categorize"), rb_block_proc());
+ rb_ivar_set(enumerator, rb_intern("chunk_by_initial_state"), initial_state);
+ rb_block_call(enumerator, rb_intern("initialize"), 0, 0, chunk_by_i, enumerator);
+ return enumerator;
+}
+
+struct slice_before_arg {
+ VALUE separator_p;
+ VALUE state;
+ VALUE prev_elts;
+ VALUE yielder;
+};
+
+static VALUE
+slice_before_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
+{
+ struct slice_before_arg *argp = (struct slice_before_arg *)_argp;
+ VALUE bool;
+
+ ENUM_WANT_SVALUE();
+
+ bool = rb_funcall(argp->separator_p, rb_intern("call"), 2, i, argp->state);
+ if (RTEST(bool)) {
+ if (!NIL_P(argp->prev_elts))
+ rb_funcall(argp->yielder, rb_intern("<<"), 1, argp->prev_elts);
+ argp->prev_elts = rb_ary_new3(1, i);
+ }
+ else {
+ if (NIL_P(argp->prev_elts))
+ argp->prev_elts = rb_ary_new3(1, i);
+ else
+ rb_ary_push(argp->prev_elts, i);
+ }
+
+ return Qnil;
+}
+
+static VALUE
+slice_before_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
+{
+ VALUE enumerable;
+ struct slice_before_arg arg;
+
+ enumerable = rb_ivar_get(enumerator, rb_intern("slice_before_enumerable"));
+ arg.separator_p = rb_ivar_get(enumerator, rb_intern("slice_before_separator_p"));
+ arg.state = rb_ivar_get(enumerator, rb_intern("slice_before_initial_state"));
+ arg.prev_elts = Qnil;
+ arg.yielder = yielder;
+
+ if (!NIL_P(arg.state))
+ arg.state = rb_obj_dup(arg.state);
+
+ rb_block_call(enumerable, id_each, 0, 0, slice_before_ii, (VALUE)&arg);
+ if (!NIL_P(arg.prev_elts))
+ rb_funcall(arg.yielder, rb_intern("<<"), 1, arg.prev_elts);
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * enum.slice_before(initial_state=nil) {|elt, state| ... } => enumerator
+ *
+ * Creates an enumerator for iterating gathered elements of _enum_.
+ *
+ * This method gathers consecutive elements which
+ * the block returns a true for the starting element.
+ *
+ * If non-nil value is given for _initial_state_,
+ * it is duplicated for each "each" method invocation of the enumerator.
+ * The duplicated object is passed to second argument of the block for "chunk_by" method..
+ *
+ * # iterate over ChangeLog entries.
+ * open("ChangeLog") {|f|
+ * f.slice_before {|line| /\A\S/ =~ line }.each {|e| pp e}
+ * }
+ *
+ * # parse mbox
+ * open("mbox") {|f|
+ * f.slice_before {|line|
+ * line.start_with? "From "
+ * }.each {|mail|
+ * unix_from = mail.shift
+ * i = mail.index("\n")
+ * header = mail[0...i]
+ * body = mail[(i+1)..-1]
+ * fields = header.slice_before {|line| !" \t".include?(line[0]) }.to_a
+ * p unix_from
+ * pp fields
+ * pp body
+ * }
+ * }
+ *
+ * # split mails in mbox (slice before Unix From line after an empty line)
+ * open("mbox") {|f|
+ * f.slice_before(emp: true) {|line,h|
+ * prevemp = h[:emp]
+ * h[:emp] = line == "\n"
+ * prevemp && line.start_with?("From ")
+ * }.each {|mail|
+ * pp mail
+ * }
+ *
+ */
+static VALUE
+enum_slice_before(int argc, VALUE *argv, VALUE enumerable)
+{
+ VALUE initial_state, enumerator;
+
+ rb_scan_args(argc, argv, "01", &initial_state);
+
+ enumerator = rb_obj_alloc(rb_cEnumerator);
+ rb_ivar_set(enumerator, rb_intern("slice_before_enumerable"), enumerable);
+ rb_ivar_set(enumerator, rb_intern("slice_before_separator_p"), rb_block_proc());
+ rb_ivar_set(enumerator, rb_intern("slice_before_initial_state"), initial_state);
+ rb_block_call(enumerator, rb_intern("initialize"), 0, 0, slice_before_i, enumerator);
+ return enumerator;
+}
+
/*
* The <code>Enumerable</code> mixin provides collection classes with
* several traversal and searching methods, and with the ability to
@@ -1852,6 +2093,9 @@ Init_Enumerable(void)
rb_define_method(rb_mEnumerable, "drop", enum_drop, 1);
rb_define_method(rb_mEnumerable, "drop_while", enum_drop_while, 0);
rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1);
+ rb_define_method(rb_mEnumerable, "gather", enum_chunk_by, -1);
+ rb_define_method(rb_mEnumerable, "chunk_by", enum_chunk_by, -1);
+ rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1);
id_eqq = rb_intern("===");
id_each = rb_intern("each");
--
[田中 哲][たなか あきら][Tanaka Akira]