[#30995] [Bug #3523] win32 exception c0000029 on exit using fibers — B Kelly <redmine@...>

Bug #3523: win32 exception c0000029 on exit using fibers

19 messages 2010/07/02

[#31100] [rubysoc] Queue C-extension patch to come — Ricardo Panaggio <panaggio.ricardo@...>

Hello,

26 messages 2010/07/07
[#31148] Re: [rubysoc] Queue C-extension patch to come — Roger Pack <rogerdpack2@...> 2010/07/09

> As this it my first patch to Ruby, I don't know where to begin with.

[#31320] Re: [rubysoc] Queue C-extension patch to come — Ricardo Panaggio <panaggio.ricardo@...> 2010/07/16

Sorry for leaving this thread for so long. I've tried to finish the

[#31322] Re: [rubysoc] Queue C-extension patch to come — Aaron Patterson <aaron@...> 2010/07/16

On Sat, Jul 17, 2010 at 06:55:35AM +0900, Ricardo Panaggio wrote:

[#31324] Re: [rubysoc] Queue C-extension patch to come — Caleb Clausen <vikkous@...> 2010/07/17

NB: I am Ricardo's mentor for this project.

[#31331] Re: [rubysoc] Queue C-extension patch to come — Benoit Daloze <eregontp@...> 2010/07/17

On 17 July 2010 06:00, Caleb Clausen <vikkous@gmail.com> wrote:

[#31332] Re: [rubysoc] Queue C-extension patch to come — Caleb Clausen <vikkous@...> 2010/07/17

On 7/17/10, Benoit Daloze <eregontp@gmail.com> wrote:

[#31138] Why is there no standard way of creating a String from a char *? — Nikolai Weibull <now@...>

Hi!

14 messages 2010/07/08
[#31146] Re: Why is there no standard way of creating a String from a char *? — Urabe Shyouhei <shyouhei@...> 2010/07/09

(2010/07/09 7:04), Nikolai Weibull wrote:

[#31149] Re: Why is there no standard way of creating a String from a char *? — Nikolai Weibull <now@...> 2010/07/09

On Fri, Jul 9, 2010 at 06:20, Urabe Shyouhei <shyouhei@ruby-lang.org> wrote:

[#31150] Re: Why is there no standard way of creating a String from a char *? — Urabe Shyouhei <shyouhei@...> 2010/07/09

(2010/07/09 18:28), Nikolai Weibull wrote:

[#31217] [Bug #3562] regression in respond_to? — Aaron Patterson <redmine@...>

Bug #3562: regression in respond_to?

14 messages 2010/07/12

[#31269] [Bug #3566] memory leak when spawning+joining Threads in a loop — Eric Wong <redmine@...>

Bug #3566: memory leak when spawning+joining Threads in a loop

14 messages 2010/07/13

[#31399] [Backport #3595] Theres no encoding to differentiate a stream of Binary data from an 8-Bit ASCII string — Dreamcat Four <redmine@...>

Backport #3595: Theres no encoding to differentiate a stream of Binary data from an 8-Bit ASCII string

17 messages 2010/07/21

[#31459] [Bug #3607] [trunk/r28731] Gem.path has disappeared? — Ollivier Robert <redmine@...>

Bug #3607: [trunk/r28731] Gem.path has disappeared?

22 messages 2010/07/23

[#31519] [Bug #3622] Net::HTTP does not wait to send request body with Expect: 100-continue — Eric Hodel <redmine@...>

Bug #3622: Net::HTTP does not wait to send request body with Expect: 100-continue

9 messages 2010/07/28

[ruby-core:31515] Re: [Feature #3619] \x{XXXX} as an escape sequence of string

From: Nobuyoshi Nakada <nobu@...>
Date: 2010-07-27 22:38:16 UTC
List: ruby-core #31515
Hi,

At Tue, 27 Jul 2010 22:21:31 +0900,
Heesob Park wrote in [ruby-core:31512]:
> I noticed String#inspect results \x{XXXX} for the encoding other than Unicode.
> 
> Is there any possibility that \x{XXXX} is accepted as an escape sequence of string?
> 
> irb(main):004:0> a = "\xC7\xD1\xB1\xDB"

This is in binary representation.

> irb(main):010:0> a[1]
> => "\x{B1DB}"

But this is in codepoint representation.

I'm afraid it may confuse users.


diff --git a/parse.y b/parse.y
index ba52135..ec13fb6 100644
--- a/parse.y
+++ b/parse.y
@@ -5456,8 +5456,8 @@ parser_tok_hex(struct parser_params *parser, size_t *numlen)
 #define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n))
 
 static int
-parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
-                   int string_literal, int symbol_literal, int regexp_literal)
+parser_tokadd_multibyte(struct parser_params *parser, rb_encoding **encp, int enctype,
+			int string_literal, int symbol_literal, int regexp_literal)
 {
     /*
      * If string_literal is true, then we allow multiple codepoints
@@ -5466,22 +5466,28 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
      * codepoint without adding it
      */
 
-    int codepoint;
-    size_t numlen;
+    int codepoint, unicode_p = enctype == 'u', mblen;
+    size_t numlen, maxlen;
+    char errmsg[64];
+    const char *encname = unicode_p ? "Unicode" : (*encp)->name;
 
-    if (regexp_literal) { tokadd('\\'); tokadd('u'); }
+    if (regexp_literal) { tokadd('\\'); tokadd(enctype); }
 
     if (peek('{')) {  /* handle \u{...} form */
+	maxlen = unicode_p ? 6 : 4;
 	do {
             if (regexp_literal) { tokadd(*lex_p); }
 	    nextc();
-	    codepoint = scan_hex(lex_p, 6, &numlen);
+	    codepoint = scan_hex(lex_p, maxlen, &numlen);
 	    if (numlen == 0)  {
-		yyerror("invalid Unicode escape");
+		snprintf(errmsg, sizeof(errmsg), "invalid %s escape", encname);
+		yyerror(errmsg);
 		return 0;
 	    }
-	    if (codepoint > 0x10ffff) {
-		yyerror("invalid Unicode codepoint (too large)");
+	    mblen = ONIGENC_CODE_TO_MBCLEN(unicode_p ? UTF8_ENC() : *encp, codepoint);
+	    if (!MBCLEN_CHARFOUND_P(mblen)) {
+		snprintf(errmsg, sizeof(errmsg), "invalid %s codepoint", encname);
+		yyerror(errmsg);
 		return 0;
 	    }
 	    lex_p += numlen;
@@ -5489,7 +5495,7 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
                 tokcopy((int)numlen);
             }
             else if (codepoint >= 0x80) {
-		*encp = UTF8_ENC();
+		if (unicode_p) *encp = UTF8_ENC();
 		if (string_literal) tokaddmbc(codepoint, *encp);
 	    }
 	    else if (string_literal) {
@@ -5506,16 +5512,18 @@ parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp,
 	nextc();
     }
     else {			/* handle \uxxxx form */
-	codepoint = scan_hex(lex_p, 4, &numlen);
-	if (numlen < 4) {
-	    yyerror("invalid Unicode escape");
+	maxlen = unicode_p ? 4 : 2;
+	codepoint = scan_hex(lex_p, maxlen, &numlen);
+	if (numlen < maxlen) {
+	    snprintf(errmsg, sizeof(errmsg), "invalid %s escape", encname);
+	    yyerror(errmsg);
 	    return 0;
 	}
-	lex_p += 4;
+	lex_p += numlen;
         if (regexp_literal) {
-            tokcopy(4);
+            tokcopy(numlen);
         }
-	else if (codepoint >= 0x80) {
+	else if (codepoint >= 0x80 && unicode_p) {
 	    *encp = UTF8_ENC();
 	    if (string_literal) tokaddmbc(codepoint, *encp);
 	}
@@ -5570,6 +5578,9 @@ parser_read_escape(struct parser_params *parser, int flags,
 	return c;
 
       case 'x':	/* hex constant */
+	if (peek('{')) {
+	    
+	}
 	c = tok_hex(&numlen);
 	if (numlen == 0) return 0;
 	return c;
@@ -5825,13 +5836,14 @@ parser_tokadd_string(struct parser_params *parser,
 		break;
 
 	      case 'u':
+	      case 'x':
 		if ((func & STR_FUNC_EXPAND) == 0) {
 		    tokadd('\\');
 		    break;
 		}
-		parser_tokadd_utf8(parser, &enc, 1,
-				   func & STR_FUNC_SYMBOL,
-                                   func & STR_FUNC_REGEXP);
+		parser_tokadd_multibyte(parser, &enc, c, 1,
+					func & STR_FUNC_SYMBOL,
+					func & STR_FUNC_REGEXP);
 		if (has_nonascii && enc != *encp) {
 		    mixed_escape(beg, enc, *encp);
 		}
@@ -6855,9 +6867,9 @@ parser_yylex(struct parser_params *parser)
 	    goto ternary;
 	}
         else if (c == '\\') {
-            if (peek('u')) {
-                nextc();
-                c = parser_tokadd_utf8(parser, &enc, 0, 0, 0);
+	    c = nextc();
+            if (c == 'u' || c == 'x') {
+                c = parser_tokadd_multibyte(parser, &enc, c, 0, 0, 0);
                 if (0x80 <= c) {
                     tokaddmbc(c, enc);
                 }
@@ -6866,6 +6878,7 @@ parser_yylex(struct parser_params *parser)
                 }
             }
             else {
+		pushback(c);
                 c = read_escape(0, &enc);
                 tokadd(c);
             }
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index 07cda75..28996ab 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -159,6 +159,9 @@ class TestM17N < Test::Unit::TestCase
     assert_encoding("EUC-JP", eval(e(%{"\\x20"})).encoding)
     assert_encoding("EUC-JP", eval(e(%{"\\n"})).encoding)
     assert_encoding("EUC-JP", eval(e(%{"\\x80"})).encoding)
+    str = eval(e(%{"\\x{a1a1}"}))
+    assert_encoding("EUC-JP", str.encoding)
+    assert_equal(0xa1a1, str.ord)
   end
 
   def test_utf8_literal


-- 
Nobu Nakada

In This Thread