[ruby-dev:45645] Re: [ruby-trunk - Feature #6440][Open] 引数にIOを渡した場合のMarshal.loadにバッファを持たせたい
From:
Nobuyoshi Nakada <nobu@...>
Date:
2012-05-17 14:09:27 UTC
List:
ruby-dev #45645
なかだです。
(12/05/17 18:21), Tanaka Akira wrote:
> バッファがあると、読みすぎることがあるのではないでしょうか。
>
> 読みすぎたぶんを捨ててしまうと、 marshal したデータが複数並んでいるものを読み込む場合に、 動作しなくなってしまうと思います。
ungetcするのはどうでしょうか。プロセスをまたいでしまうとやはりダメです
が、これは現状でもできないようですし。
diff --git i/marshal.c w/marshal.c
index e05b9f5..20b22ea 100644
--- i/marshal.c
+++ w/marshal.c
@@ -81,7 +81,7 @@ shortlen(long len, BDIGIT *ds)
static ID s_dump, s_load, s_mdump, s_mload;
static ID s_dump_data, s_load_data, s_alloc, s_call;
-static ID s_getbyte, s_read, s_write, s_binmode;
+static ID s_ungetc, s_read, s_readpartial, s_write, s_binmode;
typedef struct {
VALUE newclass;
@@ -958,7 +958,10 @@ marshal_dump(int argc, VALUE *argv)
struct load_arg {
VALUE src;
+ char *buf;
+ long buflen;
long offset;
+ int partial;
st_table *symbols;
st_table *data;
VALUE proc;
@@ -1011,6 +1014,13 @@ static VALUE r_object(struct load_arg *arg);
static ID r_symbol(struct load_arg *arg);
static VALUE path2class(VALUE path);
+NORETURN(static void too_short(void));
+static void
+too_short(void)
+{
+ rb_raise(rb_eArgError, "marshal data too short");
+}
+
static st_index_t
r_prepare(struct load_arg *arg)
{
@@ -1030,15 +1040,28 @@ r_byte(struct load_arg *arg)
c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
}
else {
- rb_raise(rb_eArgError, "marshal data too short");
+ too_short();
}
}
else {
- VALUE src = arg->src;
- VALUE v = rb_funcall2(src, s_getbyte, 0, 0);
- check_load_arg(arg, s_getbyte);
- if (NIL_P(v)) rb_eof_error();
- c = (unsigned char)NUM2CHR(v);
+ if (arg->buflen == 0) {
+ VALUE str, n = LONG2NUM(BUFSIZ);
+
+ if (arg->partial)
+ str = rb_funcall2(arg->src, s_readpartial, 1, &n);
+ else
+ str = rb_funcall2(arg->src, s_read, 1, &n);
+
+ check_load_arg(arg, s_read);
+ if (NIL_P(str)) too_short();
+ StringValue(str);
+ arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
+ memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
+ arg->offset = 0;
+ arg->buflen = RSTRING_LEN(str);
+ }
+ c = (unsigned char)arg->buf[arg->offset++];
+ arg->buflen--;
}
return c;
}
@@ -1091,6 +1114,63 @@ r_long(struct load_arg *arg)
return x;
}
+static VALUE
+r_bytes1(long len, struct load_arg *arg)
+{
+ VALUE str, n = LONG2NUM(len);
+
+ str = rb_funcall2(arg->src, s_read, 1, &n);
+ check_load_arg(arg, s_read);
+
+ if (NIL_P(str)) {
+ too_short();
+ }
+ StringValue(str);
+ if (RSTRING_LEN(str) < len) too_short();
+
+ arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
+
+ return str;
+}
+
+static VALUE
+r_bytes1_partial(long len, struct load_arg *arg)
+{
+ long buflen = arg->buflen;
+ long tmp_len, need = len - buflen;
+ const char *tmp_ptr;
+ VALUE str, tmp, n = LONG2NUM(need > BUFSIZ ? need : BUFSIZ);
+
+ tmp = rb_funcall2(arg->src, s_readpartial, 1, &n);
+
+ check_load_arg(arg, s_read);
+ if (NIL_P(tmp)) {
+ too_short();
+ }
+ StringValue(tmp);
+
+ tmp_len = RSTRING_LEN(tmp);
+
+ if (tmp_len < need) {
+ /* retry */
+ VALUE fill = r_bytes1(need-tmp_len, arg);
+ rb_str_concat(tmp, fill);
+ tmp_len = RSTRING_LEN(tmp);
+ }
+
+ tmp_ptr = RSTRING_PTR(tmp);
+ arg->infection |= (int)FL_TEST(tmp, MARSHAL_INFECTION);
+ str = rb_str_new(arg->buf+arg->offset, buflen);
+ rb_str_cat(str, tmp_ptr, need);
+ if (tmp_len-need > 0)
+ memcpy(arg->buf, tmp_ptr+need, tmp_len-need);
+
+ arg->offset = 0;
+ arg->buflen = tmp_len - need;
+
+ return str;
+}
+
#define r_bytes(arg) r_bytes0(r_long(arg), (arg))
static VALUE
@@ -1105,19 +1185,21 @@ r_bytes0(long len, struct load_arg *arg)
arg->offset += len;
}
else {
- too_short:
- rb_raise(rb_eArgError, "marshal data too short");
+ too_short();
}
}
else {
- VALUE src = arg->src;
- VALUE n = LONG2NUM(len);
- str = rb_funcall2(src, s_read, 1, &n);
- check_load_arg(arg, s_read);
- if (NIL_P(str)) goto too_short;
- StringValue(str);
- if (RSTRING_LEN(str) != len) goto too_short;
- arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
+ if (len <= arg->buflen) {
+ str = rb_str_new(arg->buf+arg->offset, len);
+ arg->offset += len;
+ arg->buflen -= len;
+ }
+ else {
+ if (arg->partial)
+ str = r_bytes1_partial(len, arg);
+ else
+ str = r_bytes1(len, arg);
+ }
}
return str;
}
@@ -1732,6 +1814,16 @@ r_object(struct load_arg *arg)
static void
clear_load_arg(struct load_arg *arg)
{
+ if (arg->buf) {
+ if (arg->buflen) {
+ VALUE str = rb_str_new(arg->buf+arg->offset, arg->buflen);
+ arg->buflen = 0;
+ rb_funcall2(arg->src, s_ungetc, 1, &str);
+ }
+ xfree(arg->buf);
+ arg->buf = 0;
+ arg->offset = 0;
+ }
if (!arg->symbols) return;
st_free_table(arg->symbols);
arg->symbols = 0;
@@ -1767,7 +1859,7 @@ marshal_load(int argc, VALUE *argv)
infection = (int)FL_TEST(port, MARSHAL_INFECTION); /* original taintedness */
port = v;
}
- else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
+ else if (rb_respond_to(port, s_ungetc) && rb_respond_to(port, s_read)) {
if (rb_respond_to(port, s_binmode)) {
rb_funcall2(port, s_binmode, 0, 0);
}
@@ -1784,6 +1876,13 @@ marshal_load(int argc, VALUE *argv)
arg->data = st_init_numtable();
arg->compat_tbl = st_init_numtable();
arg->proc = 0;
+ arg->partial = 0;
+ arg->buf = 0;
+
+ if (NIL_P(v)) {
+ if (rb_respond_to(port, s_readpartial)) arg->partial = 1;
+ arg->buf = xmalloc(BUFSIZ);
+ }
major = r_byte(arg);
minor = r_byte(arg);
@@ -1919,8 +2018,9 @@ Init_marshal(void)
s_load_data = rb_intern("_load_data");
s_alloc = rb_intern("_alloc");
s_call = rb_intern("call");
- s_getbyte = rb_intern("getbyte");
+ s_ungetc = rb_intern("ungetc");
s_read = rb_intern("read");
+ s_readpartial = rb_intern("readpartial");
s_write = rb_intern("write");
s_binmode = rb_intern("binmode");
diff --git i/test/ruby/test_marshal.rb w/test/ruby/test_marshal.rb
index 85cec0a..375d274 100644
--- i/test/ruby/test_marshal.rb
+++ w/test/ruby/test_marshal.rb
@@ -492,4 +492,14 @@ class TestMarshal < Test::Unit::TestCase
assert_equal(Rational(1, 2), Marshal.load("\x04\bU:\rRational[\ai\x06i\a"))
assert_raise(ArgumentError){Marshal.load("\x04\bU:\rRational[\bi\x00i\x00i\x00")}
end
+
+ def test_successive
+ result = IO.pipe do |r, w|
+ Thread.start do
+ 2.times {Marshal.dump(1, w)}
+ end
+ 2.times.map {Marshal.load(r)}
+ end
+ assert_equal([1, 1], result, '[ruby-dev:45644]')
+ end
end
--
--- 僕の前にBugはない。
--- 僕の後ろにBugはできる。
中田 伸悦