[ruby-list:38387] [PATCH] Kakasi,Chasen

From: MoonWolf <moonwolf@...>
Date: 2003-09-09 12:58:22 UTC
List: ruby-list #38387
MoonWolfです。

KakasiとChasenに、ちょっとしたパッチを作りました。

kakasi-020928.tar.gz

 * 変換後の文字列に汚染が伝播するよう修正

chasen1.6.tar.gz

 * 変換後の文字列に汚染が伝播するよう修正
 * Chasen.split(str)メソッドを追加

    タブ区切りの文字列をパースする。sparseの結果に使うといいかも。

    Chasen.split("123\t456\t\t789\nabc")
      # => [["123", "456", "", "789"], ["abc"]]


Attachments (2)

kakasi.c.patch (932 Bytes, text/x-diff)
--- kakasi.c.orig	2003-09-09 18:44:25.000000000 +0900
+++ kakasi.c	2003-09-09 19:23:09.000000000 +0900
@@ -28,8 +28,11 @@
     Check_Type(src, T_STRING);
 
     /* return "" immediately if source str is empty */
-    if (RSTRING(src)->len == 0)
-	return rb_str_new2("");
+    if (RSTRING(src)->len == 0) {
+	dst = rb_str_new2("");
+        OBJ_INFECT(dst, src);
+        return dst;
+    }
 
     Check_Type(opt, T_STRING);
 
@@ -70,6 +73,7 @@
     }
 
     dst = rb_str_new2("");
+    OBJ_INFECT(dst, src);
     while (i < RSTRING(src)->len) {
       if (*(RSTRING(src)->ptr + i) != '\0') {
 	buf = kakasi_do((RSTRING(src)->ptr + i));
@@ -95,5 +99,5 @@
     VALUE mKakasi = rb_define_module("Kakasi");
 
     rb_define_module_function(mKakasi, "kakasi", rb_kakasi_kakasi, 2);
-    rb_define_const(mKakasi, "KAKASI_VERSION", rb_str_new2("2002-09-28"));
+    rb_define_const(mKakasi, "KAKASI_VERSION", rb_str_new2("2003-09-09"));
 }
chasen.c.patch (1.94 KB, text/x-diff)
--- chasen.c.orig	2003-09-09 19:34:08.000000000 +0900
+++ chasen.c	2003-09-09 21:53:07.000000000 +0900
@@ -39,8 +39,12 @@
 
 static VALUE
 f_chasen_sparse_tostr(VALUE obj, VALUE str){
+   VALUE dst;
+
    Check_SafeStr(str);
-   return rb_str_new2((char*)chasen_sparse_tostr(RSTRING(str)->ptr));
+   dst = rb_str_new2((char*)chasen_sparse_tostr(RSTRING(str)->ptr));
+   OBJ_INFECT(dst, str);
+   return dst;
 }
 
 static VALUE
@@ -57,9 +61,62 @@
    while ((buf = (char*)chasen_fparse_tostr(fp->f)) != NULL) {
       rb_str_cat(dst, buf, strlen(buf));
    }
+   OBJ_INFECT(dst, io);
    return dst;
 }
 
+static VALUE
+f_chasen_split(VALUE obj, VALUE src){
+   VALUE row;
+   VALUE list;
+   unsigned char *ptr, *bottom, *start;
+   unsigned char ch;
+   int len;
+   
+   Check_SafeStr(src);
+
+   list = rb_ary_new();
+   OBJ_INFECT(list, src);
+   
+   if (RSTRING(src)->len==0) {
+      return list;
+   }
+   ptr = RSTRING(src)->ptr;
+   bottom = ptr + (RSTRING(src)->len);
+   
+   row = rb_ary_new();
+   
+   start = ptr;
+   len = 0;
+   while(ptr < bottom) {
+      ch = *ptr++;
+      switch(ch) {
+       case '\t':
+         rb_ary_push(row, rb_str_new(start, len));
+         start = ptr;
+         len = 0;
+         break;
+       case '\n':
+         if (len>0) {
+            rb_ary_push(row, rb_str_new(start, len));
+         };
+         rb_ary_push(list, row);
+         row = rb_ary_new();
+         start = ptr;
+         len = 0;
+         break;
+       default:
+         len++;
+      }
+   }
+   if (len>0) {
+      rb_ary_push(row, rb_str_new(start, len));
+      rb_ary_push(list, row);
+   };
+   
+   return list;
+}
+
 VALUE mChasen;
 
 void
@@ -68,4 +125,5 @@
    rb_define_module_function(mChasen, "getopt", f_chasen_getopt, -1);
    rb_define_module_function(mChasen, "sparse", f_chasen_sparse_tostr, 1);
    rb_define_module_function(mChasen, "fparse", f_chasen_fparse_tostr, 1);
+   rb_define_module_function(mChasen, "split", f_chasen_split, 1);
 }

In This Thread

Prev Next