ruby-core

Hi,

At Mon, 11 Apr 2005 22:00:37 +0900,
Sean E. Russell wrote in [ruby-core:04695]:
> Carp.  I also didn't notice that you didn't patch some of the encodings 
> (CP-1252, ISO-8859-15) so I need to fix those, too.  I don't have time this 
> morning, but I'll do it this evening (UTC-5).

They were not there when I posted the patch.


Index: lib/rexml/encodings/CP-1252.rb
===================================================================
RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/CP-1252.rb,v
retrieving revision 1.1
diff -U2 -p -r1.1 CP-1252.rb
--- lib/rexml/encodings/CP-1252.rb	16 May 2004 15:19:22 -0000	1.1
+++ lib/rexml/encodings/CP-1252.rb	17 May 2004 02:55:48 -0000
@@ -4,95 +4,67 @@
 module REXML
   module Encoding
-    @@__REXML_encoding_methods = %q~
+    UTF8_CP1252_TABLE = {
+      0x20AC => 0x80, # 0xe2 0x82 0xac
+      0x201A => 0x82, # 0xe2 0x82 0x9a
+      0x0192 => 0x83, # 0xc6 0x92
+      0x201E => 0x84, # 0xe2 0x82 0x9e
+      0x2026 => 0x85, # 0xe2 0x80 0xa6
+      0x2020 => 0x86, # 0xe2 0x80 0xa0
+      0x2021 => 0x87, # 0xe2 0x80 0xa1
+      0x02C6 => 0x88, # 0xcb 0x86
+      0x2030 => 0x89, # 0xe2 0x80 0xb0
+      0x0160 => 0x8A, # 0xc5 0xa0
+      0x2039 => 0x8B, # 0xe2 0x80 0xb9
+      0x0152 => 0x8C, # 0xc5 0x92
+      0x017D => 0x8E, # 0xc5 0xbd
+      0x2018 => 0x91, # 0xe2 0x80 0x98
+      0x2019 => 0x92, # 0xe2 0x80 0x99
+      0x201C => 0x93, # 0xe2 0x80 0x9c
+      0x201D => 0x94, # 0xe2 0x80 0x9d
+      0x2022 => 0x95, # 0xe2 0x80 0xa2
+      0x2013 => 0x96, # 0xe2 0x80 0x93
+      0x2014 => 0x97, # 0xe2 0x80 0x94
+      0x02DC => 0x98, # 0xcb 0x9c
+      0x2122 => 0x99, # 0xe2 0x84 0xa2
+      0x0161 => 0x9A, # 0xc5 0xa1
+      0x203A => 0x9B, # 0xe2 0x80 0xba
+      0x0152 => 0x9C, # 0xc5 0x93
+      0x017E => 0x9E, # 0xc5 0xbe
+      0x0178 => 0x9F, # 0xc5 0xb8
+    }
+    CP1252_UTF8_TABLE = UTF8_CP1252_TABLE.reverse
+
     # Convert from UTF-8
-    def encode content
-      array_utf8 = content.unpack('U*')
-      array_enc = []
-      array_utf8.each do |num|
-        case num
-          # shortcut first bunch basic characters
-        when 0..0xFF: array_enc << num
-          # characters added compared to iso-8859-1
-        when 0x20AC: array_enc << 0x80 # 0xe2 0x82 0xac
-        when 0x201A: array_enc << 0x82 # 0xe2 0x82 0x9a
-        when 0x0192: array_enc << 0x83 # 0xc6 0x92
-        when 0x201E: array_enc << 0x84 # 0xe2 0x82 0x9e
-        when 0x2026: array_enc << 0x85 # 0xe2 0x80 0xa6
-        when 0x2020: array_enc << 0x86 # 0xe2 0x80 0xa0
-        when 0x2021: array_enc << 0x87 # 0xe2 0x80 0xa1
-        when 0x02C6: array_enc << 0x88 # 0xcb 0x86
-        when 0x2030: array_enc << 0x89 # 0xe2 0x80 0xb0
-        when 0x0160: array_enc << 0x8A # 0xc5 0xa0
-        when 0x2039: array_enc << 0x8B # 0xe2 0x80 0xb9
-        when 0x0152: array_enc << 0x8C # 0xc5 0x92
-        when 0x017D: array_enc << 0x8E # 0xc5 0xbd
-        when 0x2018: array_enc << 0x91 # 0xe2 0x80 0x98
-        when 0x2019: array_enc << 0x92 # 0xe2 0x80 0x99
-        when 0x201C: array_enc << 0x93 # 0xe2 0x80 0x9c
-        when 0x201D: array_enc << 0x94 # 0xe2 0x80 0x9d
-        when 0x2022: array_enc << 0x95 # 0xe2 0x80 0xa2
-        when 0x2013: array_enc << 0x96 # 0xe2 0x80 0x93
-        when 0x2014: array_enc << 0x97 # 0xe2 0x80 0x94
-        when 0x02DC: array_enc << 0x98 # 0xcb 0x9c
-        when 0x2122: array_enc << 0x99 # 0xe2 0x84 0xa2
-        when 0x0161: array_enc << 0x9A # 0xc5 0xa1
-        when 0x203A: array_enc << 0x9B # 0xe2 0x80 0xba
-        when 0x0152: array_enc << 0x9C # 0xc5 0x93
-        when 0x017E: array_enc << 0x9E # 0xc5 0xbe
-        when 0x0178: array_enc << 0x9F # 0xc5 0xb8
-        else
+    def encode_cp_1252 content
+      enc = ""
+      content.unpack('U*').each do |num|
+        enc << UTF8_CP1252_TABLE.fetch(num) {
           # all remaining basic characters can be used directly
           if num <= 0xFF
-            array_enc << num
+            num
           else
             # Numeric entity (&#nnnn;); shard by  Stefan Scholl
-            array_enc.concat "&\##{num};".unpack('C*')
+            "&\##{num};"
           end
-        end
+        }
       end
-      array_enc.pack('C*')
+      enc
     end
-    
+
     # Convert to UTF-8
-    def decode(str)
-      array_latin9 = str.unpack('C*')
-      array_enc = []
-      array_latin9.each do |num|
-        case num
-          # characters that added compared to iso-8859-1
-        when 0x80: array_enc << 0x20AC # 0xe2 0x82 0xac
-        when 0x82: array_enc << 0x201A # 0xe2 0x82 0x9a
-        when 0x83: array_enc << 0x0192 # 0xc6 0x92
-        when 0x84: array_enc << 0x201E # 0xe2 0x82 0x9e
-        when 0x85: array_enc << 0x2026 # 0xe2 0x80 0xa6
-        when 0x86: array_enc << 0x2020 # 0xe2 0x80 0xa0
-        when 0x87: array_enc << 0x2021 # 0xe2 0x80 0xa1
-        when 0x88: array_enc << 0x02C6 # 0xcb 0x86
-        when 0x89: array_enc << 0x2030 # 0xe2 0x80 0xb0
-        when 0x8A: array_enc << 0x0160 # 0xc5 0xa0
-        when 0x8B: array_enc << 0x2039 # 0xe2 0x80 0xb9
-        when 0x8C: array_enc << 0x0152 # 0xc5 0x92
-        when 0x8E: array_enc << 0x017D # 0xc5 0xbd
-        when 0x91: array_enc << 0x2018 # 0xe2 0x80 0x98
-        when 0x92: array_enc << 0x2019 # 0xe2 0x80 0x99
-        when 0x93: array_enc << 0x201C # 0xe2 0x80 0x9c
-        when 0x94: array_enc << 0x201D # 0xe2 0x80 0x9d
-        when 0x95: array_enc << 0x2022 # 0xe2 0x80 0xa2
-        when 0x96: array_enc << 0x2013 # 0xe2 0x80 0x93
-        when 0x97: array_enc << 0x2014 # 0xe2 0x80 0x94
-        when 0x98: array_enc << 0x02DC # 0xcb 0x9c
-        when 0x99: array_enc << 0x2122 # 0xe2 0x84 0xa2
-        when 0x9A: array_enc << 0x0161 # 0xc5 0xa1
-        when 0x9B: array_enc << 0x203A # 0xe2 0x80 0xba
-        when 0x9C: array_enc << 0x0152 # 0xc5 0x93
-        when 0x9E: array_enc << 0x017E # 0xc5 0xbe
-        when 0x9F: array_enc << 0x0178 # 0xc5 0xb8
-        else
-          array_enc << num
-        end
+    def decode_cp_1252(str)
+      enc = ""
+      str.gsub(/&\#(\d+);/){$1.to_i.chr}.each_byte do |num|
+        enc << CP1252_UTF8_TABLE.fetch(num, num)
+      end
+      enc
+    end
+
+    register("CP-1252") do |obj|
+      class << obj
+        alias decode decode_iso_8859_15
+        alias encode encode_iso_8859_15
       end
-      array_enc.pack('U*')
     end
-    ~
   end
 end
Index: lib/rexml/encodings/ISO-8859-15.rb
===================================================================
RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/ISO-8859-15.rb,v
retrieving revision 1.1
diff -U2 -p -r1.1 ISO-8859-15.rb
--- lib/rexml/encodings/ISO-8859-15.rb	16 May 2004 15:19:22 -0000	1.1
+++ lib/rexml/encodings/ISO-8859-15.rb	17 May 2004 02:55:19 -0000
@@ -4,66 +4,58 @@
 module REXML
   module Encoding
-    @@__REXML_encoding_methods = %q~
+    ISO885915_UTF8_TABLE = {
+      0xA4 => 0x20AC, # 0xe2 0x82 0xac
+      0xA6 => 0x0160, # 0xc5 0xa0
+      0xA8 => 0x0161, # 0xc5 0xa1
+      0xB4 => 0x017D, # 0xc5 0xbd
+      0xB8 => 0x017E, # 0xc5 0xbe
+      0xBC => 0x0152, # 0xc5 0x92
+      0xBD => 0x0153, # 0xc5 0x93
+      0xBE => 0x0178, # 0xc5 0xb8
+    }
+    UTF8_ISO885915_TABLE = {
+      # characters removed compared to iso-8859-1
+      0xA4 => '&#164;',
+      0xA6 => '&#166;',
+      0xA8 => '&#168;',
+      0xB4 => '&#180;',
+      0xB8 => '&#184;',
+      0xBC => '&#188;',
+      0xBD => '&#189;',
+      0xBE => '&#190;',
+    }.update(ISO885915_UTF8_TABLE.reverse)
+
     # Convert from UTF-8
-    def to_iso_8859_15 content
-      array_utf8 = content.unpack('U*')
-      array_enc = []
-      array_utf8.each do |num|
-        case num
-          # shortcut first bunch basic characters
-        when 0..0xA3: array_enc << num
-          # characters removed compared to iso-8859-1
-        when 0xA4: array_enc << '&#164;'
-        when 0xA6: array_enc << '&#166;'
-        when 0xA8: array_enc << '&#168;'
-        when 0xB4: array_enc << '&#180;'
-        when 0xB8: array_enc << '&#184;'
-        when 0xBC: array_enc << '&#188;'
-        when 0xBD: array_enc << '&#189;'
-        when 0xBE: array_enc << '&#190;'
-          # characters added compared to iso-8859-1
-        when 0x20AC: array_enc << 0xA4 # 0xe2 0x82 0xac
-        when 0x0160: array_enc << 0xA6 # 0xc5 0xa0
-        when 0x0161: array_enc << 0xA8 # 0xc5 0xa1
-        when 0x017D: array_enc << 0xB4 # 0xc5 0xbd
-        when 0x017E: array_enc << 0xB8 # 0xc5 0xbe
-        when 0x0152: array_enc << 0xBC # 0xc5 0x92
-        when 0x0153: array_enc << 0xBD # 0xc5 0x93
-        when 0x0178: array_enc << 0xBE # 0xc5 0xb8
-        else
+    def encode_iso_8859_15 content
+      enc = ""
+      content.unpack('U*').each do |num|
+        enc << UTF8_ISO885915_TABLE.fetch(num) {
           # all remaining basic characters can be used directly
           if num <= 0xFF
-            array_enc << num
+            num
           else
             # Numeric entity (&#nnnn;); shard by  Stefan Scholl
-            array_enc.concat "&\##{num};".unpack('C*')
+            "&\##{num};"
           end
-        end
+        }
       end
-      array_enc.pack('C*')
+      enc
     end
-    
+
     # Convert to UTF-8
-    def from_iso_8859_15(str)
-      array_latin9 = str.unpack('C*')
-      array_enc = []
-      array_latin9.each do |num|
-        case num
-          # characters that differ compared to iso-8859-1
-        when 0xA4: array_enc << 0x20AC
-        when 0xA6: array_enc << 0x0160
-        when 0xA8: array_enc << 0x0161
-        when 0xB4: array_enc << 0x017D
-        when 0xB8: array_enc << 0x017E
-        when 0xBC: array_enc << 0x0152
-        when 0xBD: array_enc << 0x0153
-        when 0xBE: array_enc << 0x0178
-        else
-          array_enc << num
-        end
+    def decode_iso_8859_15(str)
+      enc = ""
+      str.gsub(/&\#(\d+);/){$1.to_i.chr}.each_byte do |num|
+        enc << ISO885915_UTF8_TABLE.fetch(num, num)
+      end
+      enc
+    end
+
+    register("ISO-8859-15") do |obj|
+      class << obj
+        alias decode decode_iso_8859_15
+        alias encode encode_iso_8859_15
       end
-      array_enc.pack('U*')
     end
-    ~
   end
 end
Index: lib/rexml/encodings/SHIFT_JIS.rb
===================================================================
RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/SHIFT_JIS.rb,v
retrieving revision 1.7
diff -U2 -p -r1.7 SHIFT_JIS.rb
--- lib/rexml/encodings/SHIFT_JIS.rb	19 Dec 2004 15:19:43 -0000	1.7
+++ lib/rexml/encodings/SHIFT_JIS.rb	11 Apr 2005 13:26:24 -0000
@@ -1 +1 @@
-load 'rexml/encodings/SHIFT-JIS.rb'
+require 'rexml/encodings/SHIFT-JIS'


-- 
Nobu Nakada

Thread

Prev Next

In This Thread

Prev Next