From: matthew@...
Date: 2017-07-05T10:11:18+00:00
Subject: [ruby-core:81915] [Ruby trunk Feature#13712] String#start_with?	with regexp

Issue #13712 has been updated by phluid61 (Matthew Kerwin).


Eregon (Benoit Daloze) wrote:
> 
> It might be quite useful when parsing, to avoid doing a second match just to get captures.

That could depend on whether `$&`, `$1`, `$2`, etc. are set.  I assumed @nobu was only asking about `$~` because allocating a whole MatchData object is heavier than just allocating some strings.

----------------------------------------
Feature #13712: String#start_with? with regexp
https://bugs.ruby-lang.org/issues/13712#change-65643

* Author: naruse (Yui NARUSE)
* Status: Open
* Priority: Normal
* Assignee: 
* Target version: 
----------------------------------------
String#start_with? should receive regexp.

When I write a parser, I want to check a string is start with a pattern or not.
It's just the same thing with [StringScanner#match](https://ruby-doc.org/stdlib-2.4.0/libdoc/strscan/rdoc/StringScanner.html#method-i-match-3F)

If I want to do the same thing with normal string method, it needs to write like `/\A#{re}/.match(���)`.
But if re is argument, it needs to create a new temporary regexp every time.

Though we have a workaround as follows but it's bit tricky.

```ruby
"foo ".rindex(/fo+./, 0)
```

A patch is following:

```diff
diff --git a/re.c b/re.c
index d0aa2a792e..f672ba75ec 100644
--- a/re.c
+++ b/re.c
@@ -1588,6 +1588,84 @@ rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
     return rb_reg_search0(re, str, pos, reverse, 1);
 }
 
+bool
+rb_reg_start_with_p(VALUE re, VALUE str)
+{
+    long pos = 0;
+    long result;
+    VALUE match;
+    struct re_registers regi, *regs = &regi;
+    regex_t *reg;
+    int tmpreg;
+    onig_errmsg_buffer err = "";
+
+    reg = rb_reg_prepare_re0(re, str, err);
+    tmpreg = reg != RREGEXP_PTR(re);
+    if (!tmpreg) RREGEXP(re)->usecnt++;
+
+    match = rb_backref_get();
+    if (!NIL_P(match)) {
+	if (FL_TEST(match, MATCH_BUSY)) {
+	    match = Qnil;
+	}
+	else {
+	    regs = RMATCH_REGS(match);
+	}
+    }
+    if (NIL_P(match)) {
+	MEMZERO(regs, struct re_registers, 1);
+    }
+    result = onig_match(reg,
+			 (UChar*)(RSTRING_PTR(str)),
+			 ((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)),
+			 (UChar*)(RSTRING_PTR(str)),
+			 regs, ONIG_OPTION_NONE);
+    if (!tmpreg) RREGEXP(re)->usecnt--;
+    if (tmpreg) {
+	if (RREGEXP(re)->usecnt) {
+	    onig_free(reg);
+	}
+	else {
+	    onig_free(RREGEXP_PTR(re));
+	    RREGEXP_PTR(re) = reg;
+	}
+    }
+    if (result < 0) {
+	if (regs == &regi)
+	    onig_region_free(regs, 0);
+	if (result == ONIG_MISMATCH) {
+	    rb_backref_set(Qnil);
+	    return false;
+	}
+	else {
+	    onig_error_code_to_str((UChar*)err, (int)result);
+	    rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
+	}
+    }
+
+    if (NIL_P(match)) {
+	int err;
+	match = match_alloc(rb_cMatch);
+	err = rb_reg_region_copy(RMATCH_REGS(match), regs);
+	onig_region_free(regs, 0);
+	if (err) rb_memerror();
+    }
+    else {
+	FL_UNSET(match, FL_TAINT);
+    }
+
+    RMATCH(match)->str = rb_str_new4(str);
+    OBJ_INFECT(match, str);
+
+    RMATCH(match)->regexp = re;
+    RMATCH(match)->rmatch->char_offset_updated = 0;
+    rb_backref_set(match);
+
+    OBJ_INFECT(match, re);
+
+    return true;
+}
+
 VALUE
 rb_reg_nth_defined(int nth, VALUE match)
 {
diff --git a/string.c b/string.c
index 072f1329ee..6542a4acb1 100644
--- a/string.c
+++ b/string.c
@@ -9126,6 +9126,7 @@ rb_str_rpartition(VALUE str, VALUE sep)
 					RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
 }
 
+extern bool rb_reg_start_with_p(VALUE re, VALUE str);
 /*
  *  call-seq:
  *     str.start_with?([prefixes]+)   -> true or false
@@ -9146,11 +9147,20 @@ rb_str_start_with(int argc, VALUE *argv, VALUE str)
 
     for (i=0; i<argc; i++) {
 	VALUE tmp = argv[i];
-	StringValue(tmp);
-	rb_enc_check(str, tmp);
-	if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
-	if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
-	    return Qtrue;
+	switch (BUILTIN_TYPE(tmp)) {
+	  case T_REGEXP:
+	    {
+		bool r = rb_reg_start_with_p(tmp, str);
+		if (r) return Qtrue;
+	    }
+	    break;
+	  default:
+	    StringValue(tmp);
+	    rb_enc_check(str, tmp);
+	    if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
+	    if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
+		return Qtrue;
+	}
     }
     return Qfalse;
 }
```



-- 
https://bugs.ruby-lang.org/

Unsubscribe: <mailto:ruby-core-request@ruby-lang.org?subject=unsubscribe>
<http://lists.ruby-lang.org/cgi-bin/mailman/options/ruby-core>