From: eregontp@... Date: 2017-07-04T10:10:09+00:00 Subject: [ruby-core:81898] [Ruby trunk Feature#13712] String#start_with? with regexp Issue #13712 has been updated by Eregon (Benoit Daloze). Agreed, this would be great and intuitive. I wonder, could the symmetrical String#end_with? also work with a Regexp? (having the same effect as a trailing \z in the Regexp) ---------------------------------------- Feature #13712: String#start_with? with regexp https://bugs.ruby-lang.org/issues/13712#change-65625 * Author: naruse (Yui NARUSE) * Status: Open * Priority: Normal * Assignee: * Target version: ---------------------------------------- String#start_with? should receive regexp. When I write a parser, I want to check a string is start with a pattern or not. It's just the same thing with [StringScanner#match](https://ruby-doc.org/stdlib-2.4.0/libdoc/strscan/rdoc/StringScanner.html#method-i-match-3F) If I want to do the same thing with normal string method, it needs to write like `/\A#{re}/.match(���)`. But if re is argument, it needs to create a new temporary regexp every time. Though we have a workaround as follows but it's bit tricky. ```ruby "foo ".rindex(/fo+./, 0) ``` A patch is following: ```diff diff --git a/re.c b/re.c index d0aa2a792e..f672ba75ec 100644 --- a/re.c +++ b/re.c @@ -1588,6 +1588,84 @@ rb_reg_search(VALUE re, VALUE str, long pos, int reverse) return rb_reg_search0(re, str, pos, reverse, 1); } +bool +rb_reg_start_with_p(VALUE re, VALUE str) +{ + long pos = 0; + long result; + VALUE match; + struct re_registers regi, *regs = ®i; + regex_t *reg; + int tmpreg; + onig_errmsg_buffer err = ""; + + reg = rb_reg_prepare_re0(re, str, err); + tmpreg = reg != RREGEXP_PTR(re); + if (!tmpreg) RREGEXP(re)->usecnt++; + + match = rb_backref_get(); + if (!NIL_P(match)) { + if (FL_TEST(match, MATCH_BUSY)) { + match = Qnil; + } + else { + regs = RMATCH_REGS(match); + } + } + if (NIL_P(match)) { + MEMZERO(regs, struct re_registers, 1); + } + result = onig_match(reg, + (UChar*)(RSTRING_PTR(str)), + ((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)), + (UChar*)(RSTRING_PTR(str)), + regs, ONIG_OPTION_NONE); + if (!tmpreg) RREGEXP(re)->usecnt--; + if (tmpreg) { + if (RREGEXP(re)->usecnt) { + onig_free(reg); + } + else { + onig_free(RREGEXP_PTR(re)); + RREGEXP_PTR(re) = reg; + } + } + if (result < 0) { + if (regs == ®i) + onig_region_free(regs, 0); + if (result == ONIG_MISMATCH) { + rb_backref_set(Qnil); + return false; + } + else { + onig_error_code_to_str((UChar*)err, (int)result); + rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re); + } + } + + if (NIL_P(match)) { + int err; + match = match_alloc(rb_cMatch); + err = rb_reg_region_copy(RMATCH_REGS(match), regs); + onig_region_free(regs, 0); + if (err) rb_memerror(); + } + else { + FL_UNSET(match, FL_TAINT); + } + + RMATCH(match)->str = rb_str_new4(str); + OBJ_INFECT(match, str); + + RMATCH(match)->regexp = re; + RMATCH(match)->rmatch->char_offset_updated = 0; + rb_backref_set(match); + + OBJ_INFECT(match, re); + + return true; +} + VALUE rb_reg_nth_defined(int nth, VALUE match) { diff --git a/string.c b/string.c index 072f1329ee..6542a4acb1 100644 --- a/string.c +++ b/string.c @@ -9126,6 +9126,7 @@ rb_str_rpartition(VALUE str, VALUE sep) RSTRING_LEN(str)-pos-RSTRING_LEN(sep))); } +extern bool rb_reg_start_with_p(VALUE re, VALUE str); /* * call-seq: * str.start_with?([prefixes]+) -> true or false @@ -9146,11 +9147,20 @@ rb_str_start_with(int argc, VALUE *argv, VALUE str) for (i=0; i