From: "drbrain (Eric Hodel)" Date: 2012-03-23T13:00:43+09:00 Subject: [ruby-core:43570] [ruby-trunk - Bug #6192] Integer() doesn't handle UTF-16 input Issue #6192 has been updated by drbrain (Eric Hodel). =begin I made this patch: Index: bignum.c =================================================================== --- bignum.c (revision 35117) +++ bignum.c (working copy) @@ -11,6 +11,7 @@ #include "ruby/ruby.h" #include "ruby/util.h" +#include "ruby/encoding.h" #include "internal.h" #include @@ -24,6 +25,7 @@ VALUE rb_cBignum; static VALUE big_three = Qnil; +static VALUE sym_replace = Qnil; #if defined __MINGW32__ #define USHORT _USHORT @@ -773,8 +775,21 @@ rb_str_to_inum(VALUE str, int base, int long len; VALUE v = 0; VALUE ret; + VALUE encopts; + rb_encoding *enc; StringValue(str); + + enc = rb_enc_from_index(ENCODING_GET((str))); + + if (enc != rb_usascii_encoding()) { + encopts = rb_hash_new(); + rb_hash_aset(encopts, sym_replace, rb_str_new2(" ")); + rb_obj_freeze(encopts); + + str = rb_str_conv_enc_opts(str, enc, rb_usascii_encoding(), 0, encopts); + } + if (badcheck) { s = StringValueCStr(str); } @@ -3809,5 +3824,6 @@ Init_Bignum(void) power_cache_init(); big_three = rb_uint2big(3); + sym_replace = ID2SYM(rb_intern("replace")); rb_gc_register_mark_object(big_three); } Index: test/ruby/test_literal.rb =================================================================== --- test/ruby/test_literal.rb (revision 35117) +++ test/ruby/test_literal.rb (working copy) @@ -261,6 +261,23 @@ class TestRubyLiteral < Test::Unit::Test } end + def test_integer_encoding + bug6192 = '[bug#6192]' + + s = "2007".encode(Encoding::UTF_16LE) + + assert_equal(2007, Integer(s), bug6192) + + s = "3.14 is \xCF\x80" + s.force_encoding Encoding::UTF_8 + + e = assert_raises(ArgumentError, bug6192) do + Integer(s) + end + + assert_equal("Invalid value for Integer(): \"#{s}\"", e.message) + end + def test_float head = ['', '-', '+'] chars = ['0', '1', '_', '9', 'f', '.'] But there is a problem: 1) Failure: test_integer_utf_16(TestRubyLiteral) [/Users/drbrain/Work/svn/ruby/trunk/test/ruby/test_literal.rb:278]: <"Invalid value for Integer(): \"3.14 is ��\""> expected but was <"invalid value for Integer(): \"3.14 is \\xCF\\x80\"">. I'm not sure if this output is acceptable or not. =end ---------------------------------------- Bug #6192: Integer() doesn't handle UTF-16 input https://bugs.ruby-lang.org/issues/6192#change-25064 Author: john_firebaugh (John Firebaugh) Status: Open Priority: Normal Assignee: Category: Target version: ruby -v: ruby 1.9.3p125 (2012-02-16 revision 34643) [x86_64-darwin11.3.0] >> Integer("2007".encode("UTF-16le")) ArgumentError: string contains null byte from (irb):209:in `Integer' from (irb):209 from /Users/john/.rvm/rubies/ruby-1.9.3-p125/bin/irb:16:in `
' -- http://bugs.ruby-lang.org/