[#53944] [ruby-trunk - Bug #8210][Open] Multibyte character interfering with end-line character within a regex — "sawa (Tsuyoshi Sawada)" <sawadatsuyoshi@...>

14 messages 2013/04/03

[#53974] [ruby-trunk - Feature #8215][Open] Support accessing Fiber-locals and backtraces for a Fiber — "halorgium (Tim Carey-Smith)" <ruby-lang-bugs@...>

14 messages 2013/04/03

[#54095] [ruby-trunk - Feature #8237][Open] Logical method chaining via inferred receiver — "wardrop (Tom Wardrop)" <tom@...>

34 messages 2013/04/08

[#54138] [ruby-trunk - Bug #8241][Open] If uri host-part has underscore ( '_' ), 'URI#parse' raise 'URI::InvalidURIError' — "neocoin (Sangmin Ryu)" <neocoin@...>

9 messages 2013/04/09

[#54185] [CommonRuby - Feature #8257][Open] Exception#cause to carry originating exception along with new one — "headius (Charles Nutter)" <headius@...>

43 messages 2013/04/11

[#54196] Encouraging use of CommonRuby — Charles Oliver Nutter <headius@...>

I think we need to do more to encourage the use of the CommonRuby

20 messages 2013/04/11
[#54200] Re: Encouraging use of CommonRuby — Marc-Andre Lafortune <ruby-core-mailing-list@...> 2013/04/11

Hi,

[#54211] Re: Encouraging use of CommonRuby — "NARUSE, Yui" <naruse@...> 2013/04/12

As far as I understand, what is CommonRuby and the process over CommonRuby

[#54215] Re: Encouraging use of CommonRuby — Charles Oliver Nutter <headius@...> 2013/04/12

On Thu, Apr 11, 2013 at 11:25 PM, NARUSE, Yui <naruse@airemix.jp> wrote:

[#54207] [CommonRuby - Feature #8258][Open] Dir#escape_glob — "steveklabnik (Steve Klabnik)" <steve@...>

15 messages 2013/04/12

[#54218] [CommonRuby - Feature #8259][Open] Atomic attributes accessors — "funny_falcon (Yura Sokolov)" <funny.falcon@...>

43 messages 2013/04/12

[#54288] [CommonRuby - Feature #8271][Open] Proposal for moving to a more visible, formal process for feature requests — "headius (Charles Nutter)" <headius@...>

15 messages 2013/04/15

[#54333] Requesting Commit Access — Aman Gupta <ruby@...1.net>

Hello ruby-core,

16 messages 2013/04/16

[#54473] [Backport 200 - Backport #8299][Open] Minor error in float parsing — "bobjalex (Bob Alexander)" <bobjalex@...>

27 messages 2013/04/19

[#54532] [ruby-trunk - Bug #8315][Open] mkmf does not include include paths from pkg_config anymore — "Hanmac (Hans Mackowiak)" <hanmac@...>

11 messages 2013/04/23

[#54621] [ruby-trunk - Feature #8339][Open] Introducing Geneartional Garbage Collection for CRuby/MRI — "ko1 (Koichi Sasada)" <redmine@...>

43 messages 2013/04/27
[#54643] [ruby-trunk - Feature #8339] Introducing Geneartional Garbage Collection for CRuby/MRI — "authorNari (Narihiro Nakamura)" <authorNari@...> 2013/04/28

[#54649] Re: [ruby-trunk - Feature #8339] Introducing Geneartional Garbage Collection for CRuby/MRI — SASADA Koichi <ko1@...> 2013/04/28

(2013/04/28 9:23), authorNari (Narihiro Nakamura) wrote:

[#54657] Re: [ruby-trunk - Feature #8339][Open] Introducing Geneartional Garbage Collection for CRuby/MRI — Magnus Holm <judofyr@...> 2013/04/28

On Sat, Apr 27, 2013 at 8:19 PM, ko1 (Koichi Sasada)

[#54665] [ruby-trunk - Bug #8344][Open] Status of Psych and Syck — "Eregon (Benoit Daloze)" <redmine@...>

18 messages 2013/04/28

[ruby-core:53968] [ruby-trunk - Feature #8206] Should Ruby core implement String#blank?

From: "naruse (Yui NARUSE)" <naruse@...>
Date: 2013-04-03 17:55:41 UTC
List: ruby-core #53968
Issue #8206 has been updated by naruse (Yui NARUSE).


I came up with an idea, String#include? with regexp without backref.
Could you try and comment this?

% ruby -e'p [$&," foo".include?(/[[:space:]]/),$&]'
[nil, true, nil]

diff --git a/re.c b/re.c
index 16d7e34..8c7d9de 100644
--- a/re.c
+++ b/re.c
@@ -1352,18 +1352,19 @@ rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
 }
 
 /* returns byte offset */
-long
-rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
+static long
+rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int backref)
 {
     long result;
     VALUE match;
-    struct re_registers regi, *regs = &regi;
+    struct re_registers regi;
+    struct re_registers *regs = NULL;
     char *range = RSTRING_PTR(str);
-    regex_t *reg;
+    regex_t *reg = NULL;
     int tmpreg;
 
     if (pos > RSTRING_LEN(str) || pos < 0) {
-	rb_backref_set(Qnil);
+	if (backref) rb_backref_set(Qnil);
 	return -1;
     }
 
@@ -1371,18 +1372,21 @@ rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
     tmpreg = reg != RREGEXP(re)->ptr;
     if (!tmpreg) RREGEXP(re)->usecnt++;
 
-    match = rb_backref_get();
-    if (!NIL_P(match)) {
-	if (FL_TEST(match, MATCH_BUSY)) {
-	    match = Qnil;
+    if (backref) {
+	regs = &regi;
+	match = rb_backref_get();
+	if (!NIL_P(match)) {
+	    if (FL_TEST(match, MATCH_BUSY)) {
+		match = Qnil;
+	    }
+	    else {
+		regs = RMATCH_REGS(match);
+	    }
 	}
-	else {
-	    regs = RMATCH_REGS(match);
+	if (NIL_P(match)) {
+	    MEMZERO(regs, struct re_registers, 1);
 	}
     }
-    if (NIL_P(match)) {
-	MEMZERO(regs, struct re_registers, 1);
-    }
     if (!reverse) {
 	range += RSTRING_LEN(str);
     }
@@ -1416,29 +1420,44 @@ rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
 	}
     }
 
-    if (NIL_P(match)) {
-	match = match_alloc(rb_cMatch);
-	onig_region_copy(RMATCH_REGS(match), regs);
-	onig_region_free(regs, 0);
-    }
-    else {
-	if (rb_safe_level() >= 3)
-	    OBJ_TAINT(match);
-	else
-	    FL_UNSET(match, FL_TAINT);
-    }
+    if (backref) {
+	if (NIL_P(match)) {
+	    match = match_alloc(rb_cMatch);
+	    onig_region_copy(RMATCH_REGS(match), regs);
+	    onig_region_free(regs, 0);
+	}
+	else {
+	    if (rb_safe_level() >= 3)
+		OBJ_TAINT(match);
+	    else
+		FL_UNSET(match, FL_TAINT);
+	}
 
-    RMATCH(match)->str = rb_str_new4(str);
-    RMATCH(match)->regexp = re;
-    RMATCH(match)->rmatch->char_offset_updated = 0;
-    rb_backref_set(match);
+	RMATCH(match)->str = rb_str_new4(str);
+	RMATCH(match)->regexp = re;
+	RMATCH(match)->rmatch->char_offset_updated = 0;
+	rb_backref_set(match);
 
-    OBJ_INFECT(match, re);
-    OBJ_INFECT(match, str);
+	OBJ_INFECT(match, re);
+	OBJ_INFECT(match, str);
+    }
 
     return result;
 }
 
+/* returns byte offset */
+long
+rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
+{
+    return rb_reg_search0(re, str, pos, reverse, TRUE);
+}
+
+long
+rb_reg_search_without_backref(VALUE re, VALUE str, long pos, int reverse)
+{
+    return rb_reg_search0(re, str, pos, reverse, FALSE);
+}
+
 VALUE
 rb_reg_nth_defined(int nth, VALUE match)
 {
diff --git a/string.c b/string.c
index 8bbd8a4..64d53be 100644
--- a/string.c
+++ b/string.c
@@ -4335,6 +4335,7 @@ rb_str_reverse_bang(VALUE str)
     return str;
 }
 
+long rb_reg_search_without_backref(VALUE re, VALUE str, long pos, int reverse);
 
 /*
  *  call-seq:
@@ -4353,8 +4354,13 @@ rb_str_include(VALUE str, VALUE arg)
 {
     long i;
 
-    StringValue(arg);
-    i = rb_str_index(str, arg, 0);
+    if (RB_TYPE_P(arg, T_REGEXP)) {
+	i = rb_reg_search_without_backref(arg, str, 0, FALSE);
+    }
+    else {
+	StringValue(arg);
+	i = rb_str_index(str, arg, 0);
+    }
 
     if (i == -1) return Qfalse;
     return Qtrue;
----------------------------------------
Feature #8206: Should Ruby core implement String#blank? 
https://bugs.ruby-lang.org/issues/8206#change-38187

Author: sam.saffron (Sam Saffron)
Status: Open
Priority: Normal
Assignee: 
Category: core
Target version: 


There has been some discussion about porting the #blank? protocol over to Ruby in the past that has been rejected by Matz. 

This proposal is only about String however. 

At the moment to figure out if you have a blank string you would 

"  ".strip.length == 0

The disadvantage is that this forces unneeded allocations and does too much work: 

An optimal implementation would be:

static VALUE
rb_str_blank(VALUE str)
{
  rb_encoding *enc;
  char *s, *e;

  enc = STR_ENC_GET(str);
  s = RSTRING_PTR(str);
  if (!s || RSTRING_LEN(str) == 0) return Qtrue;

  e = RSTRING_END(str);
  while (s < e) {
	  int n;
	  unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);

	  if (!rb_isspace(cc) && cc != 0) return Qfalse;
    s += n;
  }
  return Qtrue;
}

This in turn is about 5-8x than the regex solution to the problem and way faster than allocating one massive string with strip when length is large. 

Should Ruby take on this method, to accompany #strip following its practice. 

--- 

A slight caveat though is that active support has a somewhat different definition of blank? 

const unsigned int as_blank[26] = {9, 0xa, 0xb, 0xc, 0xd,
  0x20, 0x85, 0xa0, 0x1680, 0x180e, 0x2000, 0x2001,
  0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
  0x2009, 0x200a, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000
};

static VALUE
rb_str_blank_as(VALUE str)
{
  rb_encoding *enc;
  char *s, *e;
  int i;
  int found;

  enc = STR_ENC_GET(str);
  s = RSTRING_PTR(str);
  if (!s || RSTRING_LEN(str) == 0) return Qtrue;

  e = RSTRING_END(str);
  while (s < e) {
	  int n;
	  unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);

    found = 0;
    for(i=0;i<26;i++){
      unsigned int current = as_blank[i];
      if(current == cc) {
        found = 1;
        break;
      }
      if(cc < current){
        break;
      }
    }

	  if (!found) return Qfalse;
    s += n;
  }
  return Qtrue;
}

Clearly it makes no sense to have such a method. 

If Ruby took over implementing String#blank? it would clash with Active Support. But imho would enforce better API consistency. 

Thoughts?


 


-- 
http://bugs.ruby-lang.org/

In This Thread