[#45637] [ruby-trunk - Feature #6440][Open] 引数にIOを渡した場合のMarshal.loadにバッファを持たせたい — "Glass_saga (Masaki Matsushita)" <glass.saga@...>

14 messages 2012/05/16

[#45670] [ruby-trunk - Bug #6479][Open] ipaddr.rbの受け付ける書式が、プラットフォームによって異なる — "kachick (Kenichi Kamiya)" <kachick1+ruby@...>

9 messages 2012/05/22

[ruby-dev:45645] Re: [ruby-trunk - Feature #6440][Open] 引数にIOを渡した場合のMarshal.loadにバッファを持たせたい

From: Nobuyoshi Nakada <nobu@...>
Date: 2012-05-17 14:09:27 UTC
List: ruby-dev #45645
なかだです。

(12/05/17 18:21), Tanaka Akira wrote:
> バッファがあると、読みすぎることがあるのではないでしょうか。
> 
> 読みすぎたぶんを捨ててしまうと、 marshal したデータが複数並んでいるものを読み込む場合に、 動作しなくなってしまうと思います。

ungetcするのはどうでしょうか。プロセスをまたいでしまうとやはりダメです
が、これは現状でもできないようですし。


diff --git i/marshal.c w/marshal.c
index e05b9f5..20b22ea 100644
--- i/marshal.c
+++ w/marshal.c
@@ -81,7 +81,7 @@ shortlen(long len, BDIGIT *ds)
 
 static ID s_dump, s_load, s_mdump, s_mload;
 static ID s_dump_data, s_load_data, s_alloc, s_call;
-static ID s_getbyte, s_read, s_write, s_binmode;
+static ID s_ungetc, s_read, s_readpartial, s_write, s_binmode;
 
 typedef struct {
     VALUE newclass;
@@ -958,7 +958,10 @@ marshal_dump(int argc, VALUE *argv)
 
 struct load_arg {
     VALUE src;
+    char *buf;
+    long buflen;
     long offset;
+    int partial;
     st_table *symbols;
     st_table *data;
     VALUE proc;
@@ -1011,6 +1014,13 @@ static VALUE r_object(struct load_arg *arg);
 static ID r_symbol(struct load_arg *arg);
 static VALUE path2class(VALUE path);
 
+NORETURN(static void too_short(void));
+static void
+too_short(void)
+{
+    rb_raise(rb_eArgError, "marshal data too short");
+}
+
 static st_index_t
 r_prepare(struct load_arg *arg)
 {
@@ -1030,15 +1040,28 @@ r_byte(struct load_arg *arg)
 	    c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
 	}
 	else {
-	    rb_raise(rb_eArgError, "marshal data too short");
+	    too_short();
 	}
     }
     else {
-	VALUE src = arg->src;
-	VALUE v = rb_funcall2(src, s_getbyte, 0, 0);
-	check_load_arg(arg, s_getbyte);
-	if (NIL_P(v)) rb_eof_error();
-	c = (unsigned char)NUM2CHR(v);
+	if (arg->buflen == 0) {
+	    VALUE str, n = LONG2NUM(BUFSIZ);
+
+	    if (arg->partial)
+		str = rb_funcall2(arg->src, s_readpartial, 1, &n);
+	    else
+		str = rb_funcall2(arg->src, s_read, 1, &n);
+
+	    check_load_arg(arg, s_read);
+	    if (NIL_P(str)) too_short();
+	    StringValue(str);
+	    arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
+	    memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
+	    arg->offset = 0;
+	    arg->buflen = RSTRING_LEN(str);
+	}
+	c = (unsigned char)arg->buf[arg->offset++];
+	arg->buflen--;
     }
     return c;
 }
@@ -1091,6 +1114,63 @@ r_long(struct load_arg *arg)
     return x;
 }
 
+static VALUE
+r_bytes1(long len, struct load_arg *arg)
+{
+    VALUE str, n = LONG2NUM(len);
+
+    str = rb_funcall2(arg->src, s_read, 1, &n);
+    check_load_arg(arg, s_read);
+
+    if (NIL_P(str)) {
+	too_short();
+    }
+    StringValue(str);
+    if (RSTRING_LEN(str) < len) too_short();
+
+    arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
+
+    return str;
+}
+
+static VALUE
+r_bytes1_partial(long len, struct load_arg *arg)
+{
+    long buflen = arg->buflen;
+    long tmp_len, need = len - buflen;
+    const char *tmp_ptr;
+    VALUE str, tmp, n = LONG2NUM(need > BUFSIZ ? need : BUFSIZ);
+
+    tmp = rb_funcall2(arg->src, s_readpartial, 1, &n);
+
+    check_load_arg(arg, s_read);
+    if (NIL_P(tmp)) {
+	too_short();
+    }
+    StringValue(tmp);
+
+    tmp_len = RSTRING_LEN(tmp);
+
+    if (tmp_len < need) {
+	/* retry */
+	VALUE fill = r_bytes1(need-tmp_len, arg);
+	rb_str_concat(tmp, fill);
+	tmp_len = RSTRING_LEN(tmp);
+    }
+
+    tmp_ptr = RSTRING_PTR(tmp);
+    arg->infection |= (int)FL_TEST(tmp, MARSHAL_INFECTION);
+    str = rb_str_new(arg->buf+arg->offset, buflen);
+    rb_str_cat(str, tmp_ptr, need);
+    if (tmp_len-need > 0)
+	memcpy(arg->buf, tmp_ptr+need, tmp_len-need);
+
+    arg->offset = 0;
+    arg->buflen = tmp_len - need;
+
+    return str;
+}
+
 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
 
 static VALUE
@@ -1105,19 +1185,21 @@ r_bytes0(long len, struct load_arg *arg)
 	    arg->offset += len;
 	}
 	else {
-	  too_short:
-	    rb_raise(rb_eArgError, "marshal data too short");
+	    too_short();
 	}
     }
     else {
-	VALUE src = arg->src;
-	VALUE n = LONG2NUM(len);
-	str = rb_funcall2(src, s_read, 1, &n);
-	check_load_arg(arg, s_read);
-	if (NIL_P(str)) goto too_short;
-	StringValue(str);
-	if (RSTRING_LEN(str) != len) goto too_short;
-	arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
+	if (len <= arg->buflen) {
+	    str = rb_str_new(arg->buf+arg->offset, len);
+	    arg->offset += len;
+	    arg->buflen -= len;
+	}
+	else {
+	    if (arg->partial)
+		str = r_bytes1_partial(len, arg);
+	    else
+		str = r_bytes1(len, arg);
+	}
     }
     return str;
 }
@@ -1732,6 +1814,16 @@ r_object(struct load_arg *arg)
 static void
 clear_load_arg(struct load_arg *arg)
 {
+    if (arg->buf) {
+	if (arg->buflen) {
+	    VALUE str = rb_str_new(arg->buf+arg->offset, arg->buflen);
+	    arg->buflen = 0;
+	    rb_funcall2(arg->src, s_ungetc, 1, &str);
+	}
+	xfree(arg->buf);
+	arg->buf = 0;
+	arg->offset = 0;
+    }
     if (!arg->symbols) return;
     st_free_table(arg->symbols);
     arg->symbols = 0;
@@ -1767,7 +1859,7 @@ marshal_load(int argc, VALUE *argv)
 	infection = (int)FL_TEST(port, MARSHAL_INFECTION); /* original taintedness */
 	port = v;
     }
-    else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
+    else if (rb_respond_to(port, s_ungetc) && rb_respond_to(port, s_read)) {
 	if (rb_respond_to(port, s_binmode)) {
 	    rb_funcall2(port, s_binmode, 0, 0);
 	}
@@ -1784,6 +1876,13 @@ marshal_load(int argc, VALUE *argv)
     arg->data    = st_init_numtable();
     arg->compat_tbl = st_init_numtable();
     arg->proc = 0;
+    arg->partial = 0;
+    arg->buf = 0;
+
+    if (NIL_P(v)) {
+	if (rb_respond_to(port, s_readpartial)) arg->partial = 1;
+	arg->buf = xmalloc(BUFSIZ);
+    }
 
     major = r_byte(arg);
     minor = r_byte(arg);
@@ -1919,8 +2018,9 @@ Init_marshal(void)
     s_load_data = rb_intern("_load_data");
     s_alloc = rb_intern("_alloc");
     s_call = rb_intern("call");
-    s_getbyte = rb_intern("getbyte");
+    s_ungetc = rb_intern("ungetc");
     s_read = rb_intern("read");
+    s_readpartial = rb_intern("readpartial");
     s_write = rb_intern("write");
     s_binmode = rb_intern("binmode");
 
diff --git i/test/ruby/test_marshal.rb w/test/ruby/test_marshal.rb
index 85cec0a..375d274 100644
--- i/test/ruby/test_marshal.rb
+++ w/test/ruby/test_marshal.rb
@@ -492,4 +492,14 @@ class TestMarshal < Test::Unit::TestCase
     assert_equal(Rational(1, 2), Marshal.load("\x04\bU:\rRational[\ai\x06i\a"))
     assert_raise(ArgumentError){Marshal.load("\x04\bU:\rRational[\bi\x00i\x00i\x00")}
   end
+
+  def test_successive
+    result = IO.pipe do |r, w|
+      Thread.start do
+        2.times {Marshal.dump(1, w)}
+      end
+      2.times.map {Marshal.load(r)}
+    end
+    assert_equal([1, 1], result, '[ruby-dev:45644]')
+  end
 end


-- 
--- 僕の前にBugはない。
--- 僕の後ろにBugはできる。
    中田 伸悦

In This Thread