[ruby-dev:31710] optimize range space
From:
Tanaka Akira <akr@...>
Date:
2007-09-02 06:13:18 UTC
List:
ruby-dev #31710
Range のメモリ消費を節約するのはどうでしょうか。
Range は現在 T_OBJECT で、ハッシュを使って情報を保持します。
しかし、通常、begin, end, excl の 3つのインスタンス変数しか
使いません。
そうすると、メモリは 3 word あれば十分で、RVALUE に埋め込め
るはずですが、ハッシュを使っているため、st_table および bins
を外部に確保することになっています。
(st での bins への埋め込みを行う以前は、さらに
struct st_table_entry が 3つ必要でした)
これはあまりに無駄なので Range を T_STRUCT にして埋め込んで
みたのですがどうでしょうか。
ただ、T_STRUCT に変えると、marshal が問題になります。
marshal の互換性は保持したいので、内部の実体とは異なる形で
dump/load できるような仕掛けを marshal 側に入れて、marshal
したデータは T_OBJECT になるようにしました。
Index: eval_method.ci
===================================================================
--- eval_method.ci (revision 13328)
+++ eval_method.ci (working copy)
@@ -192,6 +192,19 @@
rb_add_method(CLASS_OF(klass), ID_ALLOCATOR, 0, NOEX_UNDEF);
}
+rb_alloc_func_t
+rb_get_alloc_func(VALUE klass)
+{
+ NODE *n;
+ Check_Type(klass, T_CLASS);
+ n = rb_method_node(CLASS_OF(klass), ID_ALLOCATOR);
+ if (!n) return 0;
+ if (nd_type(n) != NODE_METHOD) return 0;
+ n = n->nd_body;
+ if (nd_type(n) != NODE_CFUNC) return 0;
+ return n->nd_cfnc;
+}
+
static NODE *
search_method(VALUE klass, ID id, VALUE *klassp)
{
Index: include/ruby/intern.h
===================================================================
--- include/ruby/intern.h (revision 13328)
+++ include/ruby/intern.h (working copy)
@@ -218,8 +218,10 @@
#define rb_disable_super(klass, name) ((void)0)
#define rb_enable_super(klass, name) ((void)0)
#define HAVE_RB_DEFINE_ALLOC_FUNC 1
-void rb_define_alloc_func(VALUE, VALUE (*)(VALUE));
+typedef VALUE (*rb_alloc_func_t)(VALUE);
+void rb_define_alloc_func(VALUE, rb_alloc_func_t);
void rb_undef_alloc_func(VALUE);
+rb_alloc_func_t rb_get_alloc_func(VALUE);
void rb_clear_cache(void);
void rb_clear_cache_by_class(VALUE);
void rb_alias(VALUE, ID, ID);
@@ -355,6 +357,7 @@
/* marshal.c */
VALUE rb_marshal_dump(VALUE, VALUE);
VALUE rb_marshal_load(VALUE);
+void rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE));
/* numeric.c */
void rb_num_zerodiv(void);
VALUE rb_num_coerce_bin(VALUE, VALUE);
Index: range.c
===================================================================
--- range.c (revision 13328)
+++ range.c (working copy)
@@ -15,10 +15,34 @@
VALUE rb_cRange;
static ID id_cmp, id_succ, id_beg, id_end, id_excl;
-#define EXCL(r) RTEST(rb_ivar_get((r), id_excl))
-#define SET_EXCL(r,v) rb_ivar_set((r), id_excl, (v) ? Qtrue : Qfalse)
+#define RANGE_BEG(r) (RSTRUCT(r)->as.ary[0])
+#define RANGE_END(r) (RSTRUCT(r)->as.ary[1])
+#define RANGE_EXCL(r) (RSTRUCT(r)->as.ary[2])
+#define EXCL(r) RTEST(RANGE_EXCL(r))
+#define SET_EXCL(r,v) (RSTRUCT(r)->as.ary[2] = (v) ? Qtrue : Qfalse)
+
+#define FL_INITIALIZED FL_USER3
+
static VALUE
+range_alloc(VALUE klass)
+{
+ long n;
+ NEWOBJ(r, struct RStruct);
+ OBJSETUP(r, klass, T_STRUCT);
+
+ n = 3;
+
+ RBASIC(r)->flags &= ~RSTRUCT_EMBED_LEN_MASK;
+ RBASIC(r)->flags |= n << RSTRUCT_EMBED_LEN_SHIFT;
+ rb_mem_clear(r->as.ary, n);
+
+ RBASIC(r)->flags &= ~FL_INITIALIZED;
+
+ return (VALUE)r;
+}
+
+static VALUE
range_failed(void)
{
rb_raise(rb_eArgError, "bad value for range");
@@ -48,8 +72,8 @@
}
SET_EXCL(range, exclude_end);
- rb_ivar_set(range, id_beg, beg);
- rb_ivar_set(range, id_end, end);
+ RSTRUCT(range)->as.ary[0] = beg;
+ RSTRUCT(range)->as.ary[1] = end;
}
VALUE
@@ -77,9 +101,10 @@
rb_scan_args(argc, argv, "21", &beg, &end, &flags);
/* Ranges are immutable, so that they should be initialized only once. */
- if (rb_ivar_defined(range, id_beg)) {
+ if (RBASIC(range)->flags & FL_INITIALIZED) {
rb_name_error(rb_intern("initialize"), "`initialize' called twice");
}
+ RBASIC(range)->flags |= FL_INITIALIZED;
range_init(range, beg, end, RTEST(flags));
return Qnil;
}
@@ -121,9 +146,9 @@
if (!rb_obj_is_instance_of(obj, rb_obj_class(range)))
return Qfalse;
- if (!rb_equal(rb_ivar_get(range, id_beg), rb_ivar_get(obj, id_beg)))
+ if (!rb_equal(RANGE_BEG(range), RANGE_BEG(obj)))
return Qfalse;
- if (!rb_equal(rb_ivar_get(range, id_end), rb_ivar_get(obj, id_end)))
+ if (!rb_equal(RANGE_END(range), RANGE_END(obj)))
return Qfalse;
if (EXCL(range) != EXCL(obj))
@@ -183,9 +208,9 @@
if (!rb_obj_is_instance_of(obj, rb_obj_class(range)))
return Qfalse;
- if (!rb_eql(rb_ivar_get(range, id_beg), rb_ivar_get(obj, id_beg)))
+ if (!rb_eql(RANGE_BEG(range), RANGE_BEG(obj)))
return Qfalse;
- if (!rb_eql(rb_ivar_get(range, id_end), rb_ivar_get(obj, id_end)))
+ if (!rb_eql(RANGE_END(range), RANGE_END(obj)))
return Qfalse;
if (EXCL(range) != EXCL(obj))
@@ -209,9 +234,9 @@
long hash = EXCL(range);
VALUE v;
- v = rb_hash(rb_ivar_get(range, id_beg));
+ v = rb_hash(RANGE_BEG(range));
hash ^= v << 1;
- v = rb_hash(rb_ivar_get(range, id_end));
+ v = rb_hash(RANGE_END(range));
hash ^= v << 9;
hash ^= EXCL(range) << 24;
@@ -289,8 +314,8 @@
RETURN_ENUMERATOR(range, argc, argv);
- b = rb_ivar_get(range, id_beg);
- e = rb_ivar_get(range, id_end);
+ b = RANGE_BEG(range);
+ e = RANGE_END(range);
if (rb_scan_args(argc, argv, "01", &step) == 0) {
step = INT2FIX(1);
}
@@ -382,8 +407,8 @@
RETURN_ENUMERATOR(range, 0, 0);
- beg = rb_ivar_get(range, id_beg);
- end = rb_ivar_get(range, id_end);
+ beg = RANGE_BEG(range);
+ end = RANGE_END(range);
if (!rb_respond_to(beg, id_succ)) {
rb_raise(rb_eTypeError, "can't iterate from %s",
@@ -423,7 +448,7 @@
static VALUE
range_first(VALUE range)
{
- return rb_ivar_get(range, id_beg);
+ return RANGE_BEG(range);
}
@@ -442,7 +467,7 @@
static VALUE
range_last(VALUE range)
{
- return rb_ivar_get(range, id_end);
+ return RANGE_END(range);
}
/*
@@ -464,8 +489,8 @@
return rb_call_super(0, 0);
}
else {
- VALUE b = rb_ivar_get(range, id_beg);
- VALUE e = rb_ivar_get(range, id_end);
+ VALUE b = RANGE_BEG(range);
+ VALUE e = RANGE_END(range);
int c = rb_cmpint(rb_funcall(b, id_cmp, 1, e), b, e);
if (c > 0 || (c == 0 && EXCL(range)))
@@ -489,14 +514,14 @@
static VALUE
range_max(VALUE range)
{
- VALUE e = rb_ivar_get(range, id_end);
+ VALUE e = RANGE_END(range);
int ip = FIXNUM_P(e) || rb_obj_is_kind_of(e, rb_cInteger);
if (rb_block_given_p() || (EXCL(range) && !ip)) {
return rb_call_super(0, 0);
}
else {
- VALUE b = rb_ivar_get(range, id_beg);
+ VALUE b = RANGE_BEG(range);
int c = rb_cmpint(rb_funcall(b, id_cmp, 1, e), b, e);
if (c > 0)
@@ -519,8 +544,8 @@
long beg, end, excl;
if (rb_obj_is_kind_of(range, rb_cRange)) {
- b = rb_ivar_get(range, id_beg);
- e = rb_ivar_get(range, id_end);
+ b = RANGE_BEG(range);
+ e = RANGE_END(range);
excl = EXCL(range);
}
else {
@@ -578,8 +603,8 @@
{
VALUE str, str2;
- str = rb_obj_as_string(rb_ivar_get(range, id_beg));
- str2 = rb_obj_as_string(rb_ivar_get(range, id_end));
+ str = rb_obj_as_string(RANGE_BEG(range));
+ str2 = rb_obj_as_string(RANGE_END(range));
str = rb_str_dup(str);
rb_str_cat(str, "...", EXCL(range) ? 3 : 2);
rb_str_append(str, str2);
@@ -616,8 +641,8 @@
{
VALUE str, str2;
- str = rb_inspect(rb_ivar_get(range, id_beg));
- str2 = rb_inspect(rb_ivar_get(range, id_end));
+ str = rb_inspect(RANGE_BEG(range));
+ str2 = rb_inspect(RANGE_END(range));
str = rb_str_dup(str);
rb_str_cat(str, "...", EXCL(range) ? 3 : 2);
rb_str_append(str, str2);
@@ -668,8 +693,8 @@
static VALUE
range_include(VALUE range, VALUE val)
{
- VALUE beg = rb_ivar_get(range, id_beg);
- VALUE end = rb_ivar_get(range, id_end);
+ VALUE beg = RANGE_BEG(range);
+ VALUE end = RANGE_END(range);
int nv = FIXNUM_P(beg) || FIXNUM_P(end) ||
rb_obj_is_kind_of(beg, rb_cNumeric) ||
rb_obj_is_kind_of(end, rb_cNumeric);
@@ -711,8 +736,8 @@
{
VALUE beg, end;
- beg = rb_ivar_get(range, id_beg);
- end = rb_ivar_get(range, id_end);
+ beg = RANGE_BEG(range);
+ end = RANGE_END(range);
if (r_le(beg, val)) {
if (EXCL(range)) {
if (r_lt(val, end))
@@ -726,7 +751,34 @@
return Qfalse;
}
+static VALUE
+range_dumper(VALUE range)
+{
+ VALUE v;
+ NEWOBJ(m, struct RObject);
+ OBJSETUP(m, rb_cObject, T_OBJECT);
+ v = (VALUE)m;
+
+ rb_ivar_set(v, id_excl, EXCL(range) ? Qtrue : Qfalse);
+ rb_ivar_set(v, id_beg, RANGE_BEG(range));
+ rb_ivar_set(v, id_end, RANGE_END(range));
+ return v;
+}
+
+static VALUE
+range_loader(VALUE range, VALUE obj)
+{
+ if (TYPE(obj) != T_OBJECT || RBASIC(obj)->klass != rb_cObject) {
+ rb_raise(rb_eTypeError, "not a dumped range object");
+ }
+
+ RSTRUCT(range)->as.ary[0] = rb_ivar_get(obj, id_beg);
+ RSTRUCT(range)->as.ary[1] = rb_ivar_get(obj, id_end);
+ SET_EXCL(range, RTEST(rb_ivar_get(obj, id_excl)));
+ return range;
+}
+
/* A <code>Range</code> represents an interval---a set of values with a
* start and an end. Ranges may be constructed using the
* <em>s</em><code>..</code><em>e</em> and
@@ -782,8 +834,25 @@
void
Init_Range(void)
{
+ VALUE members;
+
+ id_cmp = rb_intern("<=>");
+ id_succ = rb_intern("succ");
+ id_beg = rb_intern("begin");
+ id_end = rb_intern("end");
+ id_excl = rb_intern("excl");
+
rb_cRange = rb_define_class("Range", rb_cObject);
+
+ /* compatibility for rb_struct_members, etc. */
+ members = rb_ary_new3(3, ID2SYM(id_beg), ID2SYM(id_end), ID2SYM(id_excl));
+ OBJ_FREEZE(members);
+ rb_iv_set(rb_cRange, "__size__", INT2FIX(3));
+ rb_iv_set(rb_cRange, "__members__", members);
+
+ rb_define_alloc_func(rb_cRange, range_alloc);
rb_include_module(rb_cRange, rb_mEnumerable);
+ rb_marshal_define_compat(rb_cRange, rb_cObject, range_dumper, range_loader);
rb_define_method(rb_cRange, "initialize", range_initialize, -1);
rb_define_method(rb_cRange, "==", range_eq, 1);
rb_define_method(rb_cRange, "===", range_eqq, 1);
@@ -807,9 +876,4 @@
rb_define_method(rb_cRange, "include?", range_include, 1);
rb_define_method(rb_cRange, "cover?", range_cover, 1);
- id_cmp = rb_intern("<=>");
- id_succ = rb_intern("succ");
- id_beg = rb_intern("begin");
- id_end = rb_intern("end");
- id_excl = rb_intern("excl");
}
Index: inits.c
===================================================================
--- inits.c (revision 13328)
+++ inits.c (working copy)
@@ -76,6 +76,7 @@
Init_Struct();
Init_Regexp();
Init_pack();
+ Init_marshal();
Init_Range();
Init_IO();
Init_Dir();
@@ -88,7 +89,6 @@
Init_Binding();
Init_Math();
Init_GC();
- Init_marshal();
Init_Enumerator();
Init_VM();
Init_ISeq();
Index: marshal.c
===================================================================
--- marshal.c (revision 13328)
+++ marshal.c (working copy)
@@ -82,12 +82,45 @@
static ID s_dump_data, s_load_data, s_alloc;
static ID s_getc, s_read, s_write, s_binmode;
+typedef struct {
+ VALUE newclass;
+ VALUE oldclass;
+ VALUE (*dumper)(VALUE);
+ VALUE (*loader)(VALUE, VALUE);
+} marshal_compat_t;
+
+static st_table *compat_allocator_tbl;
+
+void
+rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
+{
+ marshal_compat_t *compat;
+ rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
+
+ if (!allocator) {
+ rb_raise(rb_eTypeError, "no allocator");
+ }
+
+ compat = ALLOC(marshal_compat_t);
+ compat->newclass = Qnil;
+ compat->oldclass = Qnil;
+ rb_gc_register_address(&compat->newclass);
+ rb_gc_register_address(&compat->oldclass);
+ compat->newclass = newclass;
+ compat->oldclass = oldclass;
+ compat->dumper = dumper;
+ compat->loader = loader;
+
+ st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat);
+}
+
struct dump_arg {
VALUE obj;
VALUE str, dest;
st_table *symbols;
st_table *data;
int taint;
+ st_table *compat_tbl;
};
struct dump_call_arg {
@@ -363,8 +396,13 @@
{
volatile VALUE p;
char *path;
+ VALUE real_obj;
+ VALUE klass;
- VALUE klass = CLASS_OF(obj);
+ if (st_lookup(arg->compat_tbl, (st_data_t)obj, (st_data_t*)&real_obj)) {
+ obj = real_obj;
+ }
+ klass = CLASS_OF(obj);
w_extended(klass, arg, check);
w_byte(type, arg);
p = class2path(rb_class_real(klass));
@@ -459,6 +497,19 @@
if (OBJ_TAINTED(obj)) arg->taint = Qtrue;
st_add_direct(arg->data, obj, arg->data->num_entries);
+
+ {
+ marshal_compat_t *compat;
+ rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
+ if (st_lookup(compat_allocator_tbl,
+ (st_data_t)allocator,
+ (st_data_t*)&compat)) {
+ VALUE real_obj = obj;
+ obj = compat->dumper(real_obj);
+ st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
+ }
+ }
+
if (rb_respond_to(obj, s_mdump)) {
VALUE v;
@@ -720,6 +771,7 @@
arg.symbols = st_init_numtable();
arg.data = st_init_numtable();
arg.taint = Qfalse;
+ arg.compat_tbl = st_init_numtable();
c_arg.obj = obj;
c_arg.arg = &arg;
c_arg.limit = limit;
@@ -739,6 +791,7 @@
VALUE data;
VALUE proc;
int taint;
+ st_table *compat_tbl;
};
static VALUE r_entry(VALUE v, struct load_arg *arg);
@@ -899,14 +952,41 @@
static VALUE
r_entry(VALUE v, struct load_arg *arg)
{
- rb_hash_aset(arg->data, INT2FIX(RHASH_SIZE(arg->data)), v);
- if (arg->taint) OBJ_TAINT(v);
+ VALUE real_obj = Qundef;
+ if (st_lookup(arg->compat_tbl, v, (st_data_t*)&real_obj)) {
+ rb_hash_aset(arg->data, INT2FIX(RHASH_SIZE(arg->data)), real_obj);
+ }
+ else {
+ rb_hash_aset(arg->data, INT2FIX(RHASH_SIZE(arg->data)), v);
+ }
+ if (arg->taint) {
+ OBJ_TAINT(v);
+ if (real_obj != Qundef)
+ OBJ_TAINT(real_obj);
+ }
if (arg->proc) {
v = rb_funcall(arg->proc, rb_intern("call"), 1, v);
}
return v;
}
+static VALUE
+r_leave(VALUE v, struct load_arg *arg)
+{
+ VALUE real_obj;
+ marshal_compat_t *compat;
+ if (st_lookup(arg->compat_tbl, v, &real_obj)) {
+ rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
+ st_data_t key = v;
+ if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, (st_data_t*)&compat)) {
+ compat->loader(real_obj, v);
+ }
+ st_delete(arg->compat_tbl, &key, 0);
+ return real_obj;
+ }
+ return v;
+}
+
static void
r_ivar(VALUE obj, struct load_arg *arg)
{
@@ -945,6 +1025,26 @@
}
static VALUE
+obj_alloc_by_path(const char *path, struct load_arg *arg)
+{
+ VALUE klass;
+ marshal_compat_t *compat;
+ rb_alloc_func_t allocator;
+
+ klass = path2class(path);
+
+ allocator = rb_get_alloc_func(klass);
+ if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, (st_data_t*)&compat)) {
+ VALUE real_obj = rb_obj_alloc(klass);
+ VALUE obj = rb_obj_alloc(compat->oldclass);
+ st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
+ return obj;
+ }
+
+ return rb_obj_alloc(klass);
+}
+
+static VALUE
r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
{
VALUE v = Qnil;
@@ -1049,6 +1149,7 @@
}
v = rb_float_new(d);
v = r_entry(v, arg);
+ v = r_leave(v, arg);
}
break;
@@ -1094,11 +1195,13 @@
}
v = rb_big_norm((VALUE)big);
v = r_entry(v, arg);
+ v = r_leave(v, arg);
}
break;
case TYPE_STRING:
v = r_entry(r_string(arg), arg);
+ v = r_leave(v, arg);
break;
case TYPE_REGEXP:
@@ -1106,6 +1209,7 @@
volatile VALUE str = r_bytes(arg);
int options = r_byte(arg);
v = r_entry(rb_reg_new(str, options), arg);
+ v = r_leave(v, arg);
}
break;
@@ -1118,6 +1222,7 @@
while (len--) {
rb_ary_push(v, r_object(arg));
}
+ v = r_leave(v, arg);
}
break;
@@ -1136,6 +1241,7 @@
if (type == TYPE_HASH_DEF) {
RHASH(v)->ifnone = r_object(arg);
}
+ v = r_leave(v, arg);
}
break;
@@ -1170,6 +1276,7 @@
}
rb_struct_aset(v, LONG2FIX(i), r_object(arg));
}
+ v = r_leave(v, arg);
}
break;
@@ -1189,6 +1296,7 @@
}
v = rb_funcall(klass, s_load, 1, data);
v = r_entry(v, arg);
+ v = r_leave(v, arg);
}
break;
@@ -1211,19 +1319,19 @@
v = r_entry(v, arg);
data = r_object(arg);
rb_funcall(v, s_mload, 1, data);
+ v = r_leave(v, arg);
}
break;
case TYPE_OBJECT:
{
- VALUE klass = path2class(r_unique(arg));
-
- v = rb_obj_alloc(klass);
+ v = obj_alloc_by_path(r_unique(arg), arg);
if (TYPE(v) != T_OBJECT) {
rb_raise(rb_eArgError, "dump format error");
}
v = r_entry(v, arg);
r_ivar(v, arg);
+ v = r_leave(v, arg);
}
break;
@@ -1251,6 +1359,7 @@
rb_class2name(klass));
}
rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod));
+ v = r_leave(v, arg);
}
break;
@@ -1260,6 +1369,7 @@
v = rb_path2class(RSTRING_PTR(str));
v = r_entry(v, arg);
+ v = r_leave(v, arg);
}
break;
@@ -1269,6 +1379,7 @@
v = path2class(RSTRING_PTR(str));
v = r_entry(v, arg);
+ v = r_leave(v, arg);
}
break;
@@ -1278,6 +1389,7 @@
v = path2module(RSTRING_PTR(str));
v = r_entry(v, arg);
+ v = r_leave(v, arg);
}
break;
@@ -1350,6 +1462,7 @@
}
arg.src = port;
arg.offset = 0;
+ arg.compat_tbl = st_init_numtable();
major = r_byte(&arg);
minor = r_byte(&arg);
@@ -1429,6 +1542,8 @@
rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
+
+ compat_allocator_tbl = st_init_numtable();
}
VALUE
--
[田中 哲][たなか あきら][Tanaka Akira]