Re: [Clenup, fixes] ruby.h, string.c
From:
matz@... (Yukihiro Matsumoto)
Date:
2002-09-09 07:19:19 UTC
List:
ruby-core #455
Hi,
In message "[Clenup, fixes] ruby.h, string.c"
on 02/09/07, Michal Rokos <m.rokos@sh.cvut.cz> writes:
|--- ruby.h 2002/09/03 05:20:06 1.69
|+++ ruby.h 2002/09/06 16:09:25
|@@ -316,7 +316,7 @@ struct RString {
| long len;
| char *ptr;
| union {
|- int capa;
|+ long capa;
| VALUE shared;
| } aux;
| };
|@@ -325,7 +325,7 @@ struct RArray {
| struct RBasic basic;
| long len;
| union {
|- int capa;
|+ long capa;
| VALUE shared;
| } aux;
| VALUE *ptr;
|
|Nothing to say. len is LONG -> capa should be LONG as well.
OK.
|diff -u -p -r1.117 string.c
|--- string.c 2002/09/03 05:20:06 1.117
|+++ string.c 2002/09/06 16:09:30
|@@ -61,18 +61,18 @@ str_new(klass, ptr, len)
| if (len < 0) {
| rb_raise(rb_eArgError, "negative string size (or size too big)");
| }
|-
| str = rb_obj_alloc(klass);
| RSTRING(str)->len = len;
| RSTRING(str)->aux.capa = len;
| RSTRING(str)->ptr = ALLOC_N(char,len+1);
|+
| if (ptr) {
| memcpy(RSTRING(str)->ptr, ptr, len);
|+ RSTRING(str)->ptr[len] = '\0';
| }
| else {
|- MEMZERO(RSTRING(str)->ptr, char, len);
|+ MEMZERO(RSTRING(str)->ptr, char, len+1);
| }
|- RSTRING(str)->ptr[len] = '\0';
| return str;
| }
|
|Useless move of sentinel to memcpy block and make MEMZERO go to make
|sentinel for us.
OK.
|@@ -217,7 +217,7 @@ rb_str_shared_replace(str, str2)
| VALUE str, str2;
| {
| if (str == str2) return;
|- if (!FL_TEST(str, ELTS_SHARED)) free(RSTRING(str)->ptr);
|+ if (str_independent(str)) free(RSTRING(str)->ptr);
| if (NIL_P(str2)) {
| RSTRING(str)->ptr = 0;
| RSTRING(str)->len = 0;
|@@ -236,7 +236,7 @@ rb_str_shared_replace(str, str2)
| RSTRING(str2)->ptr = 0; /* abandon str2 */
| RSTRING(str2)->len = 0;
| RSTRING(str2)->aux.capa = 0;
|- FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
|+ FL_UNSET(str2, ELTS_SHARED|STR_ASSOC);
| if (OBJ_TAINTED(str2)) OBJ_TAINT(str);
| }
|
|I think that we should test if we can modify str before doing replace.
|BUGFIX: Flags should be unset for str2, not for str!
str2 for rb_str_shared_replace() is always a brand-new clean string.
It's ensured, so that we don't need any extra check here. And we
shouldn't have cleared flags for str. We should fix this.
|@@ -249,17 +249,11 @@ rb_str_associate(str, add)
| rb_ary_concat(RSTRING(str)->aux.shared, add);
| }
| else {
|- if (FL_TEST(str, ELTS_SHARED)) {
|- rb_str_modify(str);
|- }
|- else if (RSTRING(str)->aux.shared) {
|- /* str_buf */
|- if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
|- RESIZE_CAPA(str, RSTRING(str)->len);
|- }
|+ rb_str_modify(str);
|+ if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
|+ RESIZE_CAPA(str, RSTRING(str)->len);
| }
| RSTRING(str)->aux.shared = add;
|- FL_UNSET(str, ELTS_SHARED);
| FL_SET(str, STR_ASSOC);
| }
| }
|
|I think that rb_str_modify should be called if the str isn't already
|associated -> no need to clean SHARED flag then.
Probably both you and me were wrong. Since rb_str_associate() just
associate internal data to the string, we don't need to check freeze
flag at all. We just need to abandon shared flag and associate data
if required.
|@@ -398,13 +392,21 @@ rb_str_format(str, arg)
| return rb_f_sprintf(2, argv);
| }
|
|-static int
|-str_independent(str)
|+void
|+rb_str_modify_check(str)
| VALUE str;
| {
| if (OBJ_FROZEN(str)) rb_error_frozen("string");
|- if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
|+ if (!OBJ_TAINTED(str) && rb_safe_level() >= 4) {
| rb_raise(rb_eSecurityError, "Insecure: can't modify string");
|+ }
|+}
|+
|+static int
|+str_independent(str)
|+ VALUE str;
|+{
|+ rb_str_modify_check(str);
| if (!FL_TEST(str, ELTS_SHARED)) return 1;
| return 0;
| }
|
|New check and use it where it was before.
I'm not sure if we need to separate these functions; see below.
|@@ -414,14 +416,18 @@ str_make_independent(str)
| VALUE str;
| {
| char *ptr;
|+ long len = RSTRING(str)->len;
|
|- ptr = ALLOC_N(char, RSTRING(str)->len+1);
|+ ptr = ALLOC_N(char, len+1);
| if (RSTRING(str)->ptr) {
|- memcpy(ptr, RSTRING(str)->ptr, RSTRING(str)->len);
|+ memcpy(ptr, RSTRING(str)->ptr, len);
|+ ptr[len] = '\0';
|+ }
|+ else {
|+ MEMZERO(ptr, char, len+1);
| }
|- ptr[RSTRING(str)->len] = 0;
| RSTRING(str)->ptr = ptr;
|- RSTRING(str)->aux.capa = RSTRING(str)->len;
|+ RSTRING(str)->aux.capa = len;
| FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
| }
|
|Just a little cleanup and use MEMZERO for nice output.
OK.
|@@ -429,8 +435,9 @@ void
| rb_str_modify(str)
| VALUE str;
| {
|- if (!str_independent(str))
|+ if (!str_independent(str)) {
| str_make_independent(str);
|+ }
| }
|
| VALUE
|
|Ha - indent only... Sorry.
OK.
|@@ -493,10 +500,6 @@ VALUE
| rb_str_dup_frozen(str)
| VALUE str;
| {
|- if (FL_TEST(str, ELTS_SHARED)) {
|- OBJ_FREEZE(RSTRING(str)->aux.shared);
|- return RSTRING(str)->aux.shared;
|- }
| if (OBJ_FROZEN(str)) return str;
| str = rb_str_dup(str);
| OBJ_FREEZE(str);
|
|I don't understand here. This function should dup and return freezed
|result, or freeze str and dup it?
rb_str_dup() is used internally for hash keys only, so that if we can
avoid copying, we should to gain performance. I guess we should leave
this as it is.
|@@ -511,15 +514,17 @@ rb_str_resize(str, len)
| if (len < 0) {
| rb_raise(rb_eArgError, "negative string size (or size too big)");
| }
|-
|+ rb_str_modify(str);
|+
|+ if (FL_TEST(str, STR_ASSOC)) {
|+ rb_bug("Discarding ASSOC in rb_str_resize");
|+ }
| if (len != RSTRING(str)->len) {
|- rb_str_modify(str);
|-
| if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) {
| RESIZE_CAPA(str, len);
| }
|+ MEMZERO(RSTRING(str)->ptr, char, len+1);
| RSTRING(str)->len = len;
|- RSTRING(str)->ptr[len] = '\0'; /* sentinel */
| }
| return str;
| }
|
|I think that modify check should be done. (Added a nice check about
|discarding ASSOC. :-))
rb_bug() is too much. We just need to discard STR_ASSOC flag and
associated data. Besides since rb_str_resize() is not a function to
implement a method, we don't need to call rb_str_modify() unless it
is really required. I think we should leave this as it is as well.
|@@ -532,8 +537,14 @@ rb_str_buf_cat(str, ptr, len)
| {
| long capa, total;
|
|- if (FL_TEST(str, ELTS_SHARED)) {
|- rb_str_modify(str);
|+ if (len == 0) return str;
|+ if (len < 0) {
|+ rb_raise(rb_eArgError, "negative string size (or size too big)");
|+ }
|+ rb_str_modify(str);
|+
|+ if (FL_TEST(str, STR_ASSOC)) {
|+ rb_bug("String with ASSOC in rb_str_buf_cat!");
| }
| capa = RSTRING(str)->aux.capa;
| total = RSTRING(str)->len+len;
|
|Again: Modify and rb_bug when ASSOCiated.
See the comment above.
|@@ -544,8 +555,8 @@ rb_str_buf_cat(str, ptr, len)
| RESIZE_CAPA(str, capa);
| }
| memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
|- RSTRING(str)->len = total;
| RSTRING(str)->ptr[total] = '\0'; /* sentinel */
|+ RSTRING(str)->len = total;
|
| return str;
| }
|
|Ahh, indent again, sorry.
OK.
|@@ -564,23 +575,24 @@ rb_str_cat(str, ptr, len)
| const char *ptr;
| long len;
| {
|+ if (len < 0) {
|+ rb_raise(rb_eArgError, "negative string size (or size too big)");
|+ }
| rb_str_modify(str);
|- if (len > 0) {
|- if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
|- return rb_str_buf_cat(str, ptr, len);
|- }
|- RESIZE_CAPA(str, RSTRING(str)->len + len);
|+
|+ if (FL_TEST(str, STR_ASSOC)) {
|+ REALLOC_N(RSTRING(str)->ptr, char, RSTRING(str)->len+len+1);
| if (ptr) {
| memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
|+ RSTRING(str)->ptr[RSTRING(str)->len + len] = '\0'; /* sentinel */
| }
| else {
|- MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, len);
|+ MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, len+1);
| }
| RSTRING(str)->len += len;
|- RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
|+ return str;
| }
|-
|- return str;
|+ return rb_str_buf_cat(str, ptr, len);
| }
|
| VALUE
|
|Cleaned a bit.
|Fix: Don't use RESIZE_CAPA when STR_ASSOC is set.
|
|PS: I forgot to handle len == 0, will do.
We just need to discard.
|@@ -597,12 +609,14 @@ rb_str_buf_append(str, str2)
| {
| long capa, len;
|
|- if (FL_TEST(str, ELTS_SHARED)) {
|- rb_str_modify(str);
|+ rb_str_modify(str);
|+
|+ if (FL_TEST(str, STR_ASSOC)) {
|+ rb_bug("String with ASSOC in rb_str_buf_append!");
| }
| capa = RSTRING(str)->aux.capa;
|-
| len = RSTRING(str)->len+RSTRING(str2)->len;
|+
| if (capa <= len) {
| while (len > capa) {
| capa = (capa + 1) * 2;
|
|Again: Modify and bug when...
See the comments above.
|@@ -625,21 +639,21 @@ rb_str_append(str, str2)
|
| StringValue(str2);
| rb_str_modify(str);
|+
| if (RSTRING(str2)->len > 0) {
| len = RSTRING(str)->len+RSTRING(str2)->len;
|- if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
|+ if (FL_TEST(str, STR_ASSOC)) {
|+ REALLOC_N(RSTRING(str)->ptr, char, len+1);
|+ memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
|+ RSTRING(str2)->ptr, RSTRING(str2)->len);
|+ RSTRING(str)->ptr[len] = '\0'; /* sentinel */
|+ RSTRING(str)->len = len;
|+ }
|+ else {
| rb_str_buf_append(str, str2);
|- OBJ_INFECT(str, str2);
|- return str;
| }
|- RESIZE_CAPA(str, len);
|- memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
|- RSTRING(str2)->ptr, RSTRING(str2)->len);
|- RSTRING(str)->len += RSTRING(str2)->len;
|- RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
| }
| OBJ_INFECT(str, str2);
|-
| return str;
| }
|
|A little cleanup: Don't need to check SHARED flag (we did rb_str_modify)
|Fix: Don't use RESIZE_CAPA when ASSOC flag is set.
Ditto.
|@@ -1493,6 +1507,9 @@ str_gsub(argc, argv, str, bang)
| char *buf, *bp, *cp;
| int tainted = 0;
|
|+ if (bang) {
|+ rb_str_modify_check(str);
|+ }
| if (argc == 1 && rb_block_given_p()) {
| iter = 1;
| }
|
|When bang is set, modify_check should be done?
It's already checked in the function.
|@@ -1617,6 +1634,7 @@ rb_str_replace(str, str2)
| {
| if (str == str2) return str;
|
|+ rb_str_modify_check(str);
| StringValue(str2);
| if (FL_TEST(str2, ELTS_SHARED)) {
| if (str_independent(str)) {
|
|Modify_check?
It's already checked in the function.
|@@ -1624,16 +1642,15 @@ rb_str_replace(str, str2)
| }
| RSTRING(str)->len = RSTRING(str2)->len;
| RSTRING(str)->ptr = RSTRING(str2)->ptr;
|- FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
| RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
|+ FL_SET(str, ELTS_SHARED);
| }
| else {
|- rb_str_modify(str);
| rb_str_resize(str, RSTRING(str2)->len);
| memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len);
| if (FL_TEST(str2, STR_ASSOC)) {
|- FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
| RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
|+ FL_SET(str, STR_ASSOC);
| }
| }
|
|rb_str_resize now performs rb_str_modify. No need to read flags from
|str2, we already tested them.
Only if we do the modify above.
matz.