[#25636] [Oniguruma 3.X] reggnu.c — "K.Kosako" <sndgk393@...>

さっき気がついたのですが、元々は

15 messages 2005/02/05

[#25655] openssl binding for SSL_CTX_set_default_verify_paths and X509_STORE_set_default_paths — Tanaka Akira <akr@...17n.org>

open-uri で https を扱うことを考えていろいろと調べていた所、openssl で、

9 messages 2005/02/08
[#25670] Re: openssl binding for SSL_CTX_set_default_verify_paths and X509_STORE_set_default_paths — GOTOU Yuuzou <gotoyuzo@...> 2005/02/10

In message <876513vce0.fsf@serein.a02.aist.go.jp>,

[#25713] pthread trouble on sighandler — Hidetoshi NAGAI <nagai@...>

永井@知能.九工大です.

17 messages 2005/02/18
[#25714] Re: pthread trouble on sighandler — Yukihiro Matsumoto <matz@...> 2005/02/18

まつもと ゆきひろです

[#25755] I/O operation differs signal handler — Minero Aoki <aamine@...>

青木です。

14 messages 2005/02/24
[#25756] Re: I/O operation differs signal handler — Tanaka Akira <akr@...17n.org> 2005/02/24

In article <20050224091450P.aamine@loveruby.net>,

[ruby-dev:25702] Re: [Oniguruma 3.X] reggnu.c

From: Kazuo Saito <ksaito@...>
Date: 2005-02-15 16:01:21 UTC
List: ruby-dev #25702
斉藤です。

>> この件ですが、もう手をつけられていますか。今体調がよくないので
>> 週末以降にでも、私でよければお手伝いしようかと思っていたのですが。
>
> まだ作業していないので、
> よろしくお願いします。

遅くなりましたが、作成してみました。動作は make test && make test-all、
ruby -w -Ke test.rb で確認しました。
見よう見まねな部分もあるので、内容を確認して頂けると助かります。
大丈夫なようであればコミットします。

     * gc.c, re.c: now ruby calls Oniguruma API directly, bypassing
       GNU compatible APIs.

Kazuo Saito <ksaito@uranus.dti.ne.jp>

Attachments (1)

oniguruma-api.diff (12.3 KB, text/x-diff)
Index: re.c
===================================================================
--- re.c	(revision 2)
+++ re.c	(working copy)
@@ -208,16 +208,16 @@
     if (reg_kcode == curr_kcode) return;
     switch (curr_kcode) {
       case KCODE_NONE:
-	re_mbcinit(MBCTYPE_ASCII);
+	onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
 	break;
       case KCODE_EUC:
-	re_mbcinit(MBCTYPE_EUC);
+	onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP);
 	break;
       case KCODE_SJIS:
-	re_mbcinit(MBCTYPE_SJIS);
+	onigenc_set_default_encoding(ONIG_ENCODING_SJIS);
 	break;
       case KCODE_UTF8:
-	re_mbcinit(MBCTYPE_UTF8);
+	onigenc_set_default_encoding(ONIG_ENCODING_UTF8);
 	break;
     }
 }
@@ -228,16 +228,16 @@
     if (reg_kcode == curr_kcode) return;
     switch (reg_kcode) {
       case KCODE_NONE:
-	re_mbcinit(MBCTYPE_ASCII);
+	onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
 	break;
       case KCODE_EUC:
-	re_mbcinit(MBCTYPE_EUC);
+	onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP);
 	break;
       case KCODE_SJIS:
-	re_mbcinit(MBCTYPE_SJIS);
+	onigenc_set_default_encoding(ONIG_ENCODING_SJIS);
 	break;
       case KCODE_UTF8:
-	re_mbcinit(MBCTYPE_UTF8);
+	onigenc_set_default_encoding(ONIG_ENCODING_UTF8);
 	break;
     }
 }
@@ -335,11 +335,11 @@
     rb_str_buf_cat2(str, "/");
     if (re) {
 	rb_reg_check(re);
-	if (RREGEXP(re)->ptr->options & RE_OPTION_MULTILINE)
+	if (RREGEXP(re)->ptr->options & ONIG_OPTION_MULTILINE)
 	    rb_str_buf_cat2(str, "m");
-	if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE)
+	if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE)
 	    rb_str_buf_cat2(str, "i");
-	if (RREGEXP(re)->ptr->options & RE_OPTION_EXTENDED)
+	if (RREGEXP(re)->ptr->options & ONIG_OPTION_EXTEND)
 	    rb_str_buf_cat2(str, "x");
 
 	if (FL_TEST(re, KCODE_FIXED)) {
@@ -430,7 +430,7 @@
     VALUE re;
 {
     int options;
-    const int embeddable = RE_OPTION_MULTILINE|RE_OPTION_IGNORECASE|RE_OPTION_EXTENDED;
+    const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
     long len;
     const char* ptr;
     VALUE str = rb_str_buf_new2("(?");
@@ -447,13 +447,13 @@
 	if ((len -= 2) > 0) {
 	    do {
 		if (*ptr == 'm') {
-		    options |= RE_OPTION_MULTILINE;
+		    options |= ONIG_OPTION_MULTILINE;
 		}
 		else if (*ptr == 'i') {
-		    options |= RE_OPTION_IGNORECASE;
+		    options |= ONIG_OPTION_IGNORECASE;
 		}
 		else if (*ptr == 'x') {
-		    options |= RE_OPTION_EXTENDED;
+		    options |= ONIG_OPTION_EXTEND;
 		}
 		else break;
 		++ptr;
@@ -464,13 +464,13 @@
 	    --len;
 	    do {
 		if (*ptr == 'm') {
-		    options &= ~RE_OPTION_MULTILINE;
+		    options &= ~ONIG_OPTION_MULTILINE;
 		}
 		else if (*ptr == 'i') {
-		    options &= ~RE_OPTION_IGNORECASE;
+		    options &= ~ONIG_OPTION_IGNORECASE;
 		}
 		else if (*ptr == 'x') {
-		    options &= ~RE_OPTION_EXTENDED;
+		    options &= ~ONIG_OPTION_EXTEND;
 		}
 		else break;
 		++ptr;
@@ -487,10 +487,12 @@
 	    kcode_set_option(re);
 	    r = re_alloc_pattern(&rp);
 	    if (r == 0) {
-		err = (re_compile_pattern(++ptr, len -= 2, rp, NULL) != 0);
+		 ++ptr;
+ 		 len -= 2;
+		 err = (onig_compile(rp, ptr, ptr + len, NULL) != 0);
 	    }
 	    kcode_reset_option();
-	    re_free_pattern(rp);
+	    onig_free(rp);
 	}
 	if (err) {
 	    options = RREGEXP(re)->ptr->options;
@@ -499,15 +501,15 @@
 	}
     }
 
-    if (options & RE_OPTION_MULTILINE) rb_str_buf_cat2(str, "m");
-    if (options & RE_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i");
-    if (options & RE_OPTION_EXTENDED) rb_str_buf_cat2(str, "x");
+    if (options & ONIG_OPTION_MULTILINE) rb_str_buf_cat2(str, "m");
+    if (options & ONIG_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i");
+    if (options & ONIG_OPTION_EXTEND) rb_str_buf_cat2(str, "x");
 
     if ((options & embeddable) != embeddable) {
 	rb_str_buf_cat2(str, "-");
-	if (!(options & RE_OPTION_MULTILINE)) rb_str_buf_cat2(str, "m");
-	if (!(options & RE_OPTION_IGNORECASE)) rb_str_buf_cat2(str, "i");
-	if (!(options & RE_OPTION_EXTENDED)) rb_str_buf_cat2(str, "x");
+	if (!(options & ONIG_OPTION_MULTILINE)) rb_str_buf_cat2(str, "m");
+	if (!(options & ONIG_OPTION_IGNORECASE)) rb_str_buf_cat2(str, "i");
+	if (!(options & ONIG_OPTION_EXTEND)) rb_str_buf_cat2(str, "x");
     }
 
     rb_str_buf_cat2(str, ":");
@@ -547,7 +549,7 @@
     VALUE re;
 {
     rb_reg_check(re);
-    if (RREGEXP(re)->ptr->options & RE_OPTION_IGNORECASE) return Qtrue;
+    if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return Qtrue;
     return Qfalse;
 }
 
@@ -626,6 +628,7 @@
     Regexp *rp;
     char err[ONIG_MAX_ERROR_MESSAGE_LEN];
     int r;
+    OnigErrorInfo einfo;
 
     /* Handle escaped characters first. */
 
@@ -636,16 +639,17 @@
 
     r = re_alloc_pattern(&rp);
     if (r) {
-	re_error_code_to_str((UChar* )err, r);
+	onig_error_code_to_str((UChar* )err, r);
 	rb_reg_raise(s, len, err, 0, ce);
     }
 
     if (flags) {
 	rp->options = flags;
     }
-    r = re_compile_pattern(s, len, rp, err);
+    r = onig_compile(rp, (UChar* )s, (UChar* )(s + len), &einfo);
 
     if (r != 0) {
+	(void )onig_error_code_to_str((UChar* )err, r, &einfo);
 	rb_reg_raise(s, len, err, 0, ce);
     }
     return rp;
@@ -694,9 +698,9 @@
 	rb_raise(rb_eTypeError, "wrong argument class");
     }
     RMATCH(obj)->str = RMATCH(orig)->str;
-    re_free_registers(RMATCH(obj)->regs);
+    onig_region_free(RMATCH(obj)->regs, 0);
     RMATCH(obj)->regs->allocated = 0;
-    re_copy_registers(RMATCH(obj)->regs, RMATCH(orig)->regs);
+    onig_region_copy(RMATCH(obj)->regs, RMATCH(orig)->regs);
 
     return obj;
 }
@@ -830,12 +834,12 @@
     /* ignorecase status */
     if (ruby_ignorecase && !state) {
 	FL_SET(re, REG_CASESTATE);
-	RREGEXP(re)->ptr->options |= RE_OPTION_IGNORECASE;
+	RREGEXP(re)->ptr->options |= ONIG_OPTION_IGNORECASE;
 	need_recompile = 1;
     }
     if (!ruby_ignorecase && state) {
 	FL_UNSET(re, REG_CASESTATE);
-	RREGEXP(re)->ptr->options &= ~RE_OPTION_IGNORECASE;
+	RREGEXP(re)->ptr->options &= ~ONIG_OPTION_IGNORECASE;
 	need_recompile = 1;
     }
 
@@ -849,13 +853,22 @@
     if (need_recompile) {
 	char err[ONIG_MAX_ERROR_MESSAGE_LEN];
 	int r;
+	OnigErrorInfo einfo;
+	regex_t *reg;
+	UChar *pattern;
 
 	if (FL_TEST(re, KCODE_FIXED))
 	    kcode_set_option(re);
 	rb_reg_check(re);
-	r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err);
+	reg = RREGEXP(re)->ptr;
+	pattern = ((UChar* )RREGEXP(re)->str);
+	r = onig_recompile(reg, pattern, pattern + RREGEXP(re)->len,
+			   reg->options, onigenc_get_default_encoding(),
+			   OnigDefaultSyntax, &einfo);
+
 	if (r != 0) {
-	    rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re, Qfalse);
+	     (void )onig_error_code_to_str((UChar* )err, r, &einfo);
+	     rb_reg_raise(pattern, RREGEXP(re)->len, err, re, Qfalse);
 	}
     }
 }
@@ -866,6 +879,8 @@
     long pos, reverse;
 {
     long range;
+    OnigEncoding enc;
+    UChar *p, *string;
 
     rb_reg_check(re);
     if (may_need_recompile) rb_reg_prepare_re(re);
@@ -881,9 +896,22 @@
     else {
 	range = RSTRING(str)->len - pos;
     }
-    return re_adjust_startpos(RREGEXP(re)->ptr,
-			      RSTRING(str)->ptr, RSTRING(str)->len,
-			      pos, range);
+
+    enc = (RREGEXP(re)->ptr)->enc;
+
+    if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING(str)->len) {
+	 string = (UChar* )RSTRING(str)->ptr;
+
+	 if (range > 0) {
+	      p = onigenc_get_right_adjust_char_head(enc, string, string + pos);
+	 }
+	 else {
+	      p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos);
+	 }
+	 return p - string;
+    }
+
+    return pos;
 }
 
 long
@@ -915,9 +943,14 @@
     else {
 	range = RSTRING(str)->len - pos;
     }
-    result = re_search(RREGEXP(re)->ptr,RSTRING(str)->ptr,RSTRING(str)->len,
-		       pos, range, &regs);
 
+    result = onig_search(RREGEXP(re)->ptr,
+			 (UChar* )(RSTRING(str)->ptr),
+			 ((UChar* )(RSTRING(str)->ptr) + RSTRING(str)->len),
+			 ((UChar* )(RSTRING(str)->ptr) + pos),
+			 ((UChar* )(RSTRING(str)->ptr) + pos + range),
+			 &regs, ONIG_OPTION_NONE);
+
     if (FL_TEST(re, KCODE_FIXED))
 	kcode_reset_option();
 
@@ -928,7 +961,7 @@
 	}
 	else {
 	    char err[ONIG_MAX_ERROR_MESSAGE_LEN];
-	    re_error_code_to_str((UChar* )err, result);
+	    onig_error_code_to_str((UChar* )err, result);
 	    rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, 0, Qfalse);
 	}
     }
@@ -944,7 +977,7 @@
 	    FL_UNSET(match, FL_TAINT);
     }
 
-    re_copy_registers(RMATCH(match)->regs, &regs);
+    onig_region_copy(RMATCH(match)->regs, &regs);
     RMATCH(match)->str = rb_str_new4(str);
     rb_backref_set(match);
 
@@ -1338,7 +1371,7 @@
 {
     struct RRegexp *re = RREGEXP(obj);
 
-    if (re->ptr) re_free_pattern(re->ptr);
+    if (re->ptr) onig_free(re->ptr);
     if (re->str) free(re->str);
     re->ptr = 0;
     re->str = 0;
@@ -1366,7 +1399,7 @@
 	kcode_set_option((VALUE)re);
     }
     if (ruby_ignorecase) {
-	options |= RE_OPTION_IGNORECASE;
+	options |= ONIG_OPTION_IGNORECASE;
 	FL_SET(re, REG_CASESTATE);
     }
     re->ptr = make_regexp(s, len, options & 0xf, ce);
@@ -1734,7 +1767,7 @@
     else {
 	if (argc >= 2) {
 	    if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
-	    else if (RTEST(argv[1])) flags = RE_OPTION_IGNORECASE;
+	    else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE;
 	}
 	if (argc == 3 && !NIL_P(argv[2])) {
 	    char *kcode = StringValuePtr(argv[2]);
@@ -1924,7 +1957,7 @@
 
     rb_reg_check(re);
     options = RREGEXP(re)->ptr->options &
-	(RE_OPTION_IGNORECASE|RE_OPTION_MULTILINE|RE_OPTION_EXTENDED);
+	(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND);
     if (FL_TEST(re, KCODE_FIXED)) {
 	options |= rb_reg_get_kcode(re);
     }
@@ -2157,17 +2190,17 @@
       case 'E':
       case 'e':
 	reg_kcode = KCODE_EUC;
-	re_mbcinit(MBCTYPE_EUC);
+	onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP);
 	break;
       case 'S':
       case 's':
 	reg_kcode = KCODE_SJIS;
-	re_mbcinit(MBCTYPE_SJIS);
+	onigenc_set_default_encoding(ONIG_ENCODING_SJIS);
 	break;
       case 'U':
       case 'u':
 	reg_kcode = KCODE_UTF8;
-	re_mbcinit(MBCTYPE_UTF8);
+	onigenc_set_default_encoding(ONIG_ENCODING_UTF8);
 	break;
       default:
       case 'N':
@@ -2176,7 +2209,7 @@
       case 'a':
       set_no_conversion:
 	reg_kcode = KCODE_NONE;
-	re_mbcinit(MBCTYPE_ASCII);
+	onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
 	break;
     }
 }
@@ -2271,17 +2304,17 @@
 {
     rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
 
-    re_set_casetable(casetable);
+    onigenc_set_default_caseconv_table((UChar* )casetable);
 #if DEFAULT_KCODE == KCODE_EUC
-    re_mbcinit(MBCTYPE_EUC);
+    onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP);
 #else
 #if DEFAULT_KCODE == KCODE_SJIS
-    re_mbcinit(MBCTYPE_SJIS);
+    onigenc_set_default_encoding(ONIG_ENCODING_SJIS);
 #else
 #if DEFAULT_KCODE == KCODE_UTF8
-    re_mbcinit(MBCTYPE_UTF8);
+    onigenc_set_default_encoding(ONIG_ENCODING_UTF8);
 #else
-    re_mbcinit(MBCTYPE_ASCII);
+    onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
 #endif
 #endif
 #endif
@@ -2320,9 +2353,9 @@
     rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
     rb_define_method(rb_cRegexp, "kcode", rb_reg_kcode_m, 0);
 
-    rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(RE_OPTION_IGNORECASE));
-    rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(RE_OPTION_EXTENDED));
-    rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(RE_OPTION_MULTILINE));
+    rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
+    rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
+    rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
 
     rb_global_variable(&reg_cache);
 
Index: gc.c
===================================================================
--- gc.c	(revision 2)
+++ gc.c	(working copy)
@@ -52,7 +52,6 @@
 #include <windows.h>
 #endif
 
-void re_free_registers _((struct re_registers*));
 int rb_io_fptr_finalize _((struct OpenFile*));
 
 #if !defined(setjmp) && defined(HAVE__SETJMP)
@@ -1166,7 +1165,7 @@
 	break;
       case T_REGEXP:
 	if (RANY(obj)->as.regexp.ptr) {
-	    re_free_pattern(RANY(obj)->as.regexp.ptr);
+	    onig_free(RANY(obj)->as.regexp.ptr);
 	}
 	if (RANY(obj)->as.regexp.str) {
 	    RUBY_CRITICAL(free(RANY(obj)->as.regexp.str));
@@ -1184,7 +1183,7 @@
 	break;
       case T_MATCH:
 	if (RANY(obj)->as.match.regs) {
-	    re_free_registers(RANY(obj)->as.match.regs);
+	    onig_region_free(RANY(obj)->as.match.regs, 0);
 	    RUBY_CRITICAL(free(RANY(obj)->as.match.regs));
 	}
 	break;

In This Thread