[#7102] Ruby 1.3.4-990611 — Yukihiro Matsumoto <matz@...>

Ruby 1.3.4-990611 is out, check out:

20 messages 1999/06/11

[#7223] Ruby 1.3.4-990625 — Yukihiro Matsumoto <matz@...>

Ruby 1.3.4-990625 is out, check out:

14 messages 1999/06/25
[#7224] -Wl,-rpath on Linux (Re: Ruby 1.3.4-990625) — Ryo HAYASAKA <hayasaka@...21.u-aizu.ac.jp> 1999/06/25

早坂@会津大学です。

[ruby-dev:7200] pack/unpack

From: Tadayoshi Funaba <tadf@...>
Date: 1999-06-23 13:18:06 UTC
List: ruby-dev #7200
ふなばです。

pack/unpack で UTF8 がちょっと違うみたいです。だいたいこんな感じかなあ
と思うんですが、 uv_to_utf8() の最後のあたりよくわからなかったです。余
計な修正してるかも。


--- pack.c.orig	Fri Jun 11 15:29:56 1999
+++ pack.c	Wed Jun 23 22:06:26 1999
@@ -289,8 +289,8 @@
 static void encodes _((VALUE,char*,int,int));
 static void qpencode _((VALUE,VALUE,int));
 
-static long uv_to_utf8 _((char*,long));
-static long utf8_to_uv _((char*,int*));
+static int uv_to_utf8 _((char*,unsigned long));
+static unsigned long utf8_to_uv _((char*,int*));
 
 static void
 pack_add_ptr(str, add)
@@ -790,14 +790,15 @@
 	    while (len-- > 0) {
 		unsigned long l;
 		char buf[8];
+		int le;
 
 		from = NEXTFROM;
 		if (NIL_P(from)) l = 0;
 		else {
 		    l = NUM2ULONG(from);
 		}
-		l = uv_to_utf8(buf, l);
-		rb_str_cat(res, (char*)&buf, l);
+		le = uv_to_utf8(buf, l);
+		rb_str_cat(res, (char*)&buf, le);
 	    }
 	    break;
 
@@ -1399,7 +1400,7 @@
 
 		l = utf8_to_uv(s, &alen);
 		s += alen;
-		rb_ary_push(ary, INT2NUM(l));
+		rb_ary_push(ary, rb_uint2inum(l));
 	    }
 	    break;
 
@@ -1585,42 +1586,42 @@
 
 #define BYTEWIDTH 8
 
-static long
+static int
 uv_to_utf8(buf, uv)
     char *buf;
-    long uv;
+    unsigned long uv;
 {
-    if (uv < 0x80) {
+    if (uv <= 0x7f) {
 	buf[0] = (char)uv;
 	return 1;
     }
-    if (uv < 0x7ff) {
+    if (uv <= 0x7ff) {
 	buf[0] = ((uv>>6)&0xff)|0xc0;
 	buf[1] = uv&0x3f;
 	return 2;
     }
-    if (uv < 0xffff) {
+    if (uv <= 0xffff) {
 	buf[0] = ((uv>>12)&0xff)|0xe0;
 	buf[1] = (uv>>6)&0x3f;
 	buf[2] = uv&0x3f;
 	return 3;
     }
-    if (uv < 0x1fffff) {
+    if (uv <= 0x1fffff) {
 	buf[0] = ((uv>>18)&0xff)|0xf0;
 	buf[1] = (uv>>12)&0x3f;
 	buf[2] = (uv>>6)&0x3f;
 	buf[3] = uv&0x3f;
 	return 4;
     }
-    if (uv < 0x3ffffff) {
-	buf[0] = ((uv>>24)&0xff)|0xf0;
+    if (uv <= 0x3ffffff) {
+	buf[0] = ((uv>>24)&0xff)|0xf8;
 	buf[1] = (uv>>18)&0x3f;
 	buf[2] = (uv>>12)&0x3f;
 	buf[3] = (uv>>6)&0x3f;
 	buf[4] = uv&0x3f;
 	return 5;
     }
-    if (uv < 0x7fffffff) {
+    if (uv <= 0x7fffffff) {
 	buf[0] = ((uv>>30)&0xff)|0xfc;
 	buf[1] = (uv>>24)&0x3f;
 	buf[2] = (uv>>18)&0x3f;
@@ -1629,12 +1630,22 @@
 	buf[5] = uv&0x3f;
 	return 6;
     }
+    /* if (uv <= 0xfffffffff) */ {
+	buf[0] = 0xfe;
+	buf[1] = (uv>>30)&0x3f;
+	buf[2] = (uv>>24)&0x3f;
+	buf[3] = (uv>>18)&0x3f;
+	buf[4] = (uv>>12)&0x3f;
+	buf[5] = (uv>>6)&0x3f;
+	buf[6] = uv&0x3f;
+	return 7;
+    }
     buf[0] = uv>>BYTEWIDTH;
     buf[1] = uv&0xff;
     return 2;
 }
 
-static long
+static unsigned long
 utf8_to_uv(p, lenp)
     char *p;
     int *lenp;
@@ -1649,12 +1660,15 @@
     else if (c < 0xf8) n = 4;
     else if (c < 0xfc) n = 5;
     else if (c < 0xfe) n = 6;
+    else if (c == 0xfe) n = 7;
     *lenp = n--;
 
     uv = c;
-    uv &= (1<<(BYTEWIDTH-2-n)) - 1;
-    while (n--) {
-	uv = uv << 6 | *p++ & ((1<<6)-1);
+    if (n != 0) {
+	uv &= (1<<(BYTEWIDTH-2-n)) - 1;
+	while (n--) {
+	    uv = uv << 6 | *p++ & ((1<<6)-1);
+	}
     }
     return uv;
 }


$><<[[[['J'],[[[['u']]],[['s']]]]],[['t',[[[[' ']],'a']]]],['n'],[[['o']],[
[['t']],'h']],[[['e'],['r']]],[[[[' ']],['R']]],[[[['u']]],['b']],[[[['y'],
[' ']]]],[[['h',[[[[['a']]]],[['c']]],'k']]],[[['e']]],[[[['r']]],[[',']]]]

ふなば ただよし

In This Thread

Prev Next