[ruby-dev:32563] transcoder loading
From:
Nobuyoshi Nakada <nobu@...>
Date:
2007-12-12 19:05:10 UTC
List:
ruby-dev #32563
なかだです。
transcoderを拡張ライブラリに出してみました。ext/とは別にしたほう
がいいかもしれませんが。
include/ruby/transcode.hとext/enc/trans/iso_8859/iso_8859.cは、
それぞれtranscode_data.hとtranscode_data_iso_8859.cをsvn mv した
ものです。
Index: common.mk
===================================================================
--- common.mk (revision 14198)
+++ common.mk (working copy)
@@ -25,4 +25,6 @@ ENCOBJS = ascii.$(OBJEXT) \
utf8.$(OBJEXT)
+TRANSCODEOBJS = iso_8859.$(OBJEXT)
+
COMMONOBJS = array.$(OBJEXT) \
bignum.$(OBJEXT) \
@@ -67,5 +69,4 @@ COMMONOBJS = array.$(OBJEXT) \
time.$(OBJEXT) \
transcode.$(OBJEXT) \
- transcode_data_iso_8859.$(OBJEXT) \
util.$(OBJEXT) \
variable.$(OBJEXT) \
@@ -544,7 +545,6 @@ thread.$(OBJEXT): {$(VPATH)}thread.c {$(
{$(VPATH)}node.h {$(VPATH)}util.h \
{$(VPATH)}signal.h {$(VPATH)}st.h {$(VPATH)}dln.h
-transcode.$(OBJEXT): {$(VPATH)}transcode.c {$(VPATH)}transcode_data.h {$(VPATH)}ruby.h {$(VPATH)}config.h \
+transcode.$(OBJEXT): {$(VPATH)}transcode.c {$(VPATH)}transcode.h {$(VPATH)}ruby.h {$(VPATH)}config.h \
{$(VPATH)}defines.h {$(VPATH)}intern.h {$(VPATH)}missing.h {$(VPATH)}encoding.h
-transcode_data_iso_8859.$(OBJEXT): {$(VPATH)}transcode_data_iso_8859.c {$(VPATH)}transcode_data.h
cont.$(OBJEXT): {$(VPATH)}cont.c {$(VPATH)}eval_intern.h \
{$(VPATH)}ruby.h {$(VPATH)}vm_core.h {$(VPATH)}id.h {$(VPATH)}config.h \
Index: transcode.c
===================================================================
--- transcode.c (revision 14198)
+++ transcode.c (working copy)
@@ -13,7 +13,7 @@
#include "ruby/ruby.h"
#include "ruby/encoding.h"
-
-#include "transcode_data.h"
-
+#define PType (int)
+#include "ruby/transcode.h"
+#include <ctype.h>
VALUE rb_str_tmp_new(long);
@@ -24,130 +24,62 @@ VALUE rb_str_shared_replace(VALUE, VALUE
*/
-/* extern declarations, should use some include file here */
-extern const BYTE_LOOKUP from_ISO_8859_1;
-extern const BYTE_LOOKUP from_ISO_8859_2;
-extern const BYTE_LOOKUP from_ISO_8859_3;
-extern const BYTE_LOOKUP from_ISO_8859_4;
-extern const BYTE_LOOKUP from_ISO_8859_5;
-extern const BYTE_LOOKUP from_ISO_8859_6;
-extern const BYTE_LOOKUP from_ISO_8859_7;
-extern const BYTE_LOOKUP from_ISO_8859_8;
-extern const BYTE_LOOKUP from_ISO_8859_9;
-extern const BYTE_LOOKUP from_ISO_8859_10;
-extern const BYTE_LOOKUP from_ISO_8859_11;
-extern const BYTE_LOOKUP from_ISO_8859_13;
-extern const BYTE_LOOKUP from_ISO_8859_14;
-extern const BYTE_LOOKUP from_ISO_8859_15;
-
-extern const BYTE_LOOKUP to_ISO_8859_1;
-extern const BYTE_LOOKUP to_ISO_8859_2;
-extern const BYTE_LOOKUP to_ISO_8859_3;
-extern const BYTE_LOOKUP to_ISO_8859_4;
-extern const BYTE_LOOKUP to_ISO_8859_5;
-extern const BYTE_LOOKUP to_ISO_8859_6;
-extern const BYTE_LOOKUP to_ISO_8859_7;
-extern const BYTE_LOOKUP to_ISO_8859_8;
-extern const BYTE_LOOKUP to_ISO_8859_9;
-extern const BYTE_LOOKUP to_ISO_8859_10;
-extern const BYTE_LOOKUP to_ISO_8859_11;
-extern const BYTE_LOOKUP to_ISO_8859_13;
-extern const BYTE_LOOKUP to_ISO_8859_14;
-extern const BYTE_LOOKUP to_ISO_8859_15;
-
-
-/* declarations probably need to go into separate header file, e.g. transcode.h */
-
-/* static structure, one per supported encoding pair */
-typedef struct {
- const char *from_encoding;
- const char *to_encoding;
- const BYTE_LOOKUP *conv_tree_start;
- int max_output;
- int from_utf8;
-} transcoder;
-
-/* todo: dynamic structure, one per conversion (stream) */
-
-/* in the future, add some mechanism for dynamically adding stuff here */
-#define MAX_TRANSCODERS 29 /* todo: fix: this number has to be adjusted by hand */
-static transcoder transcoder_table[MAX_TRANSCODERS];
+static st_table *transcoder_table;
+
+#define TRANSCODER_SEP '@'
/* not sure why it's not possible to do relocatable initializations */
/* maybe the code here can be removed (changed to simple initialization) */
/* if we move this to another file???? */
-static void
-register_transcoder(const char *from_e, const char *to_e,
- const BYTE_LOOKUP *tree_start, int max_output, int from_utf8)
-{
- static int n = 0;
- if (n >= MAX_TRANSCODERS) {
- /* we are initializing, is it okay to use rb_raise here? */
- rb_raise(rb_eRuntimeError /*change exception*/, "not enough transcoder slots");
- }
- transcoder_table[n].from_encoding = from_e;
- transcoder_table[n].to_encoding = to_e;
- transcoder_table[n].conv_tree_start = tree_start;
- transcoder_table[n].max_output = max_output;
- transcoder_table[n].from_utf8 = from_utf8;
-
- n++;
-}
-
-static void
-init_transcoder_table(void)
+void
+rb_register_transcoder(const char *from_e, const char *to_e,
+ rb_transcode_loop_func *func, const void *arg, int max_output)
{
- register_transcoder("ISO-8859-1", "UTF-8", &from_ISO_8859_1, 2, 0);
- register_transcoder("ISO-8859-2", "UTF-8", &from_ISO_8859_2, 2, 0);
- register_transcoder("ISO-8859-3", "UTF-8", &from_ISO_8859_3, 2, 0);
- register_transcoder("ISO-8859-4", "UTF-8", &from_ISO_8859_4, 2, 0);
- register_transcoder("ISO-8859-5", "UTF-8", &from_ISO_8859_5, 3, 0);
- register_transcoder("ISO-8859-6", "UTF-8", &from_ISO_8859_6, 2, 0);
- register_transcoder("ISO-8859-7", "UTF-8", &from_ISO_8859_7, 3, 0);
- register_transcoder("ISO-8859-8", "UTF-8", &from_ISO_8859_8, 3, 0);
- register_transcoder("ISO-8859-9", "UTF-8", &from_ISO_8859_9, 2, 0);
- register_transcoder("ISO-8859-10", "UTF-8", &from_ISO_8859_10, 3, 0);
- register_transcoder("ISO-8859-11", "UTF-8", &from_ISO_8859_11, 3, 0);
- register_transcoder("ISO-8859-13", "UTF-8", &from_ISO_8859_13, 3, 0);
- register_transcoder("ISO-8859-14", "UTF-8", &from_ISO_8859_14, 3, 0);
- register_transcoder("ISO-8859-15", "UTF-8", &from_ISO_8859_15, 3, 0);
- register_transcoder("UTF-8", "ISO-8859-1", &to_ISO_8859_1, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-2", &to_ISO_8859_2, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-3", &to_ISO_8859_3, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-4", &to_ISO_8859_4, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-5", &to_ISO_8859_5, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-6", &to_ISO_8859_6, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-7", &to_ISO_8859_7, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-8", &to_ISO_8859_8, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-9", &to_ISO_8859_9, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-10", &to_ISO_8859_10, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-11", &to_ISO_8859_11, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-13", &to_ISO_8859_13, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-14", &to_ISO_8859_14, 1, 1);
- register_transcoder("UTF-8", "ISO-8859-15", &to_ISO_8859_15, 1, 1);
- register_transcoder(NULL, NULL, NULL, 0, 0);
+ long from_len = strlen(from_e);
+ long to_len = strlen(to_e);
+ char *const key = xmalloc(from_len + to_len + 2);
+ st_data_t val = 0;
+ rb_transcoder *my_transcoder;
+
+ memcpy(key, from_e, from_len);
+ memcpy(key + from_len + 1, to_e, to_len + 1);
+ key[from_len] = TRANSCODER_SEP;
+ if (st_lookup(transcoder_table, (st_data_t)key, &val)) {
+ xfree(key);
+ rb_raise(rb_eArgError, "transcoder has been already registered - %s", key);
+ }
+ my_transcoder = ALLOC(rb_transcoder);
+ my_transcoder->transcode_loop = func;
+ my_transcoder->transcode_arg = (void *)arg;
+ my_transcoder->max_output = max_output;
+ st_insert(transcoder_table, (st_data_t)key, (st_data_t)my_transcoder);
}
-
-static transcoder*
+static rb_transcoder *
transcode_dispatch(const char* from_encoding, const char* to_encoding)
{
- transcoder *candidate = transcoder_table;
-
- for (candidate = transcoder_table; candidate->from_encoding; candidate++)
- if (0==strcasecmp(from_encoding, candidate->from_encoding)
- && 0==strcasecmp(to_encoding, candidate->to_encoding))
- break;
- /* in the future, add multistep transcoding logic here */
- return candidate->from_encoding ? candidate : NULL;
+ static const char prefix[] = "enc/trans/";
+ long from_len = strlen(from_encoding);
+ long to_len = strlen(to_encoding);
+ char *const path = alloca(from_len + to_len + sizeof(prefix) + 1);
+ char *const key = path + sizeof(prefix) - 1;
+ st_data_t val = 0;
+ char *s;
+
+ memcpy(key, from_encoding, from_len);
+ memcpy(key + from_len + 1, to_encoding, to_len + 1);
+ key[from_len] = TRANSCODER_SEP;
+ for (s = key; *s; ++s) {
+ if (ISUPPER(*s)) *s = tolower(*s);
+ }
+ if (!st_lookup(transcoder_table, (st_data_t)key, &val)) {
+ memcpy(path, prefix, sizeof(prefix) - 1);
+ if (!rb_require(path)) return 0;
+ if (!st_lookup(transcoder_table, (st_data_t)key, &val)) {
+ return 0;
+ }
+ }
+ return (rb_transcoder *)val;
}
-/* dynamic structure, one per conversion (similar to iconv_t) */
-/* may carry conversion state (e.g. for iso-2022-jp) */
-typedef struct transcoding {
- VALUE ruby_string_dest; /* the String used as the conversion destination,
- or NULL if something else is being converted */
- char *(*flush_func)(struct transcoding*, int, int);
-} transcoding;
-
/*
@@ -157,14 +89,14 @@ static void
transcode_loop(char **in_pos, char **out_pos,
char *in_stop, char *out_stop,
- transcoder *my_transcoder,
- transcoding *my_transcoding)
+ const rb_transcoder *my_transcoder,
+ int from_utf8,
+ rb_transcoding *my_transcoding)
{
char *in_p = *in_pos, *out_p = *out_pos;
- const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
+ const BYTE_LOOKUP *conv_tree_start = my_transcoder->transcode_arg;
const BYTE_LOOKUP *next_table;
unsigned int next_offset;
VALUE next_info;
unsigned char next_byte;
- int from_utf8 = my_transcoder->from_utf8;
char *out_s = out_stop - my_transcoder->max_output + 1;
while (in_p < in_stop) {
@@ -238,4 +170,24 @@ transcode_loop(char **in_pos, char **out
}
+void
+rb_transcode_loop_from_utf8(char **in_pos, char **out_pos,
+ char *in_stop, char *out_stop,
+ const rb_transcoder *my_transcoder,
+ rb_transcoding *my_transcoding)
+{
+ transcode_loop(in_pos, out_pos, in_stop, out_stop,
+ my_transcoder, 1, my_transcoding);
+}
+
+void
+rb_transcode_loop_simple(char **in_pos, char **out_pos,
+ char *in_stop, char *out_stop,
+ const rb_transcoder *my_transcoder,
+ rb_transcoding *my_transcoding)
+{
+ transcode_loop(in_pos, out_pos, in_stop, out_stop,
+ my_transcoder, 0, my_transcoding);
+}
+
/*
@@ -244,5 +196,5 @@ transcode_loop(char **in_pos, char **out
static char *
-str_transcoding_resize(transcoding *my_transcoding, int len, int new_len)
+str_transcoding_resize(rb_transcoding *my_transcoding, int len, int new_len)
{
VALUE dest_string = my_transcoding->ruby_string_dest;
@@ -261,6 +213,6 @@ str_transcode(int argc, VALUE *argv, VAL
int from_encidx, to_encidx;
VALUE from_encval, to_encval;
- transcoder *my_transcoder;
- transcoding my_transcoding;
+ rb_transcoder *my_transcoder;
+ rb_transcoding my_transcoding;
if (argc<1 || argc>2) {
@@ -312,5 +264,6 @@ str_transcode(int argc, VALUE *argv, VAL
/* for simple testing: */
- transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding);
+ my_transcoder->transcode_loop(&fromp, &bp, (sp+slen), (bp+blen),
+ my_transcoder->transcode_arg, &my_transcoding);
if (fromp != sp+slen) {
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
@@ -375,5 +328,5 @@ void
Init_transcode(void)
{
- init_transcoder_table();
+ transcoder_table = st_init_strcasetable();
rb_define_method(rb_cString, "encode", rb_str_transcode, -1);
rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);
Index: include/ruby/transcode.h
===================================================================
--- include/ruby/transcode.h (revision 14198)
+++ include/ruby/transcode.h (working copy)
@@ -1,2 +1,5 @@
+#ifndef RUBY_TRANSCODE_H
+#define RUBY_TRANSCODE_H
+
typedef unsigned char base_element;
@@ -6,10 +9,15 @@ typedef struct byte_lookup {
} BYTE_LOOKUP;
-#ifdef TRANSCODE_DATA
+/* dynamic structure, one per conversion (similar to iconv_t) */
+/* may carry conversion state (e.g. for iso-2022-jp) */
+typedef struct rb_transcoding {
+ VALUE ruby_string_dest; /* the String used as the conversion destination,
+ or NULL if something else is being converted */
+ char *(*flush_func)(struct rb_transcoding*, int, int);
+} rb_transcoding;
+
+#ifndef PType
/* data file needs to treat this as a pointer, to remove warnings */
#define PType (const BYTE_LOOKUP *)
-#else
-/* in code, this is treated as just an integer */
-#define PType (int)
#endif
@@ -38,2 +46,23 @@ typedef struct byte_lookup {
#define THREETRAIL /* legal but undefined if three more trailing UTF-8 */
+/* static structure, one per supported encoding pair */
+typedef struct rb_transcoder rb_transcoder;
+
+typedef void rb_transcode_loop_func(char **, char **, char *, char *,
+ const struct rb_transcoder *, rb_transcoding *);
+
+struct rb_transcoder {
+ rb_transcode_loop_func *transcode_loop;
+ void *transcode_arg;
+ int max_output;
+};
+
+void rb_transcode_loop_from_utf8(char **, char **, char *, char *,
+ const rb_transcoder *, rb_transcoding *);
+void rb_transcode_loop_simple(char **, char **, char *, char *,
+ const rb_transcoder *, rb_transcoding *);
+
+void rb_register_transcoder(const char *from_e, const char *to_e,
+ rb_transcode_loop_func *func, const void *arg,
+ int max_output);
+#endif
Index: ext/enc/trans/iso_8859/lib/iso-8859-1@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-1@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-1@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-1.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-1.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-1.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-2@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-2@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-2@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-2.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-2.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-2.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-3@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-3@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-3@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-3.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-3.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-3.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-4@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-4@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-4@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-4.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-4.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-4.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-5@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-5@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-5@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-5.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-5.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-5.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-6@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-6@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-6@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-6.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-6.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-6.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-7@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-7@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-7@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-7.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-7.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-7.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-8@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-8@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-8@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-9.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-9.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-9.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-9@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-9@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-9@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-10.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-10.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-10.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-10@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-10@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-10@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-11.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-11.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-11.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-11@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-11@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-11@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-12@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-12@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-12@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-12.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-12.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-12.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-13@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-13@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-13@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-13.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-13.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-13.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-14.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-14.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-14.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-14@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-14@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-14@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/utf-8@iso-8859-15.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/utf-8@iso-8859-15.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/utf-8@iso-8859-15.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/lib/iso-8859-15@utf-8.rb
===================================================================
--- ext/enc/trans/iso_8859/lib/iso-8859-15@utf-8.rb (revision 0)
+++ ext/enc/trans/iso_8859/lib/iso-8859-15@utf-8.rb (revision 0)
@@ -0,0 +1 @@
+require "enc/trans/iso_8859"
Index: ext/enc/trans/iso_8859/iso_8859.c
===================================================================
--- ext/enc/trans/iso_8859/iso_8859.c (revision 0)
+++ ext/enc/trans/iso_8859/iso_8859.c (working copy)
@@ -1,4 +1,9 @@
-#define TRANSCODE_DATA
-#include "transcode_data.h"
+#include "ruby.h"
+#include "ruby/transcode.h"
+
+typedef struct {
+ const BYTE_LOOKUP *conv_tree_start;
+ int max_output;
+} iso_8859_transcoder;
static const unsigned char
@@ -89,5 +94,5 @@ from_ISO_8859_1_infos[129] = {
output2('\xC3','\xBF'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_1 = {
from_ISO_8859_1_offsets,
@@ -182,5 +187,5 @@ to_ISO_8859_1_infos[3] = {
NOMAP, &to_ISO_8859_1_C2, &to_ISO_8859_1_C3,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_1 = {
to_ISO_8859_1_offsets,
@@ -275,5 +280,5 @@ from_ISO_8859_2_infos[129] = {
output2('\xCB','\x99'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_2 = {
from_ISO_8859_2_offsets,
@@ -418,5 +423,5 @@ to_ISO_8859_2_infos[6] = {
&to_ISO_8859_2_C5, &to_ISO_8859_2_CB,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_2 = {
to_ISO_8859_2_offsets,
@@ -507,5 +512,5 @@ from_ISO_8859_3_infos[122] = {
output2('\xC5','\x9D'), output2('\xCB','\x99'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_3 = {
from_ISO_8859_3_offsets,
@@ -649,5 +654,5 @@ to_ISO_8859_3_infos[6] = {
&to_ISO_8859_3_C5, &to_ISO_8859_3_CB,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_3 = {
to_ISO_8859_3_offsets,
@@ -742,5 +747,5 @@ from_ISO_8859_4_infos[129] = {
output2('\xCB','\x99'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_4 = {
from_ISO_8859_4_offsets,
@@ -886,5 +891,5 @@ to_ISO_8859_4_infos[6] = {
&to_ISO_8859_4_C5, &to_ISO_8859_4_CB,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_4 = {
to_ISO_8859_4_offsets,
@@ -979,5 +984,5 @@ from_ISO_8859_5_infos[129] = {
output2('\xD1','\x9F'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_5 = {
from_ISO_8859_5_offsets,
@@ -1124,5 +1129,5 @@ to_ISO_8859_5_infos[5] = {
&to_ISO_8859_5_E2,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_5 = {
to_ISO_8859_5_offsets,
@@ -1194,5 +1199,5 @@ from_ISO_8859_6_infos[84] = {
output2('\xD9','\x91'), output2('\xD9','\x92'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_6 = {
from_ISO_8859_6_offsets,
@@ -1293,5 +1298,5 @@ to_ISO_8859_6_infos[4] = {
NOMAP, &to_ISO_8859_6_C2, &to_ISO_8859_6_D8, &to_ISO_8859_6_D9,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_6 = {
to_ISO_8859_6_offsets,
@@ -1384,5 +1389,5 @@ from_ISO_8859_7_infos[126] = {
output2('\xCF','\x8D'), output2('\xCF','\x8E'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_7 = {
from_ISO_8859_7_offsets,
@@ -1560,5 +1565,5 @@ to_ISO_8859_7_infos[6] = {
&to_ISO_8859_7_CF, &to_ISO_8859_7_E2,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_7 = {
to_ISO_8859_7_offsets,
@@ -1635,5 +1640,5 @@ from_ISO_8859_8_infos[93] = {
output3('\xE2','\x80','\x8F'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_8 = {
from_ISO_8859_8_offsets,
@@ -1770,5 +1775,5 @@ to_ISO_8859_8_infos[5] = {
&to_ISO_8859_8_E2,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_8 = {
to_ISO_8859_8_offsets,
@@ -1863,5 +1868,5 @@ from_ISO_8859_9_infos[129] = {
output2('\xC3','\xBF'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_9 = {
from_ISO_8859_9_offsets,
@@ -1990,5 +1995,5 @@ to_ISO_8859_9_infos[5] = {
&to_ISO_8859_9_C5,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_9 = {
to_ISO_8859_9_offsets,
@@ -2083,5 +2088,5 @@ from_ISO_8859_10_infos[129] = {
output2('\xC4','\xB8'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_10 = {
from_ISO_8859_10_offsets,
@@ -2245,5 +2250,5 @@ to_ISO_8859_10_infos[6] = {
&to_ISO_8859_10_C5, &to_ISO_8859_10_E2,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_10 = {
to_ISO_8859_10_offsets,
@@ -2334,5 +2339,5 @@ from_ISO_8859_11_infos[121] = {
output3('\xE0','\xB9','\x9B'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_11 = {
from_ISO_8859_11_offsets,
@@ -2459,5 +2464,5 @@ to_ISO_8859_11_infos[3] = {
NOMAP, &to_ISO_8859_11_C2, &to_ISO_8859_11_E0,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_11 = {
to_ISO_8859_11_offsets,
@@ -2552,5 +2557,5 @@ from_ISO_8859_13_infos[129] = {
output3('\xE2','\x80','\x99'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_13 = {
from_ISO_8859_13_offsets,
@@ -2712,5 +2717,5 @@ to_ISO_8859_13_infos[6] = {
&to_ISO_8859_13_C5, &to_ISO_8859_13_E2,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_13 = {
to_ISO_8859_13_offsets,
@@ -2805,5 +2810,5 @@ from_ISO_8859_14_infos[129] = {
output2('\xC3','\xBF'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_14 = {
from_ISO_8859_14_offsets,
@@ -3016,5 +3021,5 @@ to_ISO_8859_14_infos[6] = {
&to_ISO_8859_14_C5, &to_ISO_8859_14_E1,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_14 = {
to_ISO_8859_14_offsets,
@@ -3109,5 +3114,5 @@ from_ISO_8859_15_infos[129] = {
output2('\xC3','\xBF'),
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
from_ISO_8859_15 = {
from_ISO_8859_15_offsets,
@@ -3253,5 +3258,5 @@ to_ISO_8859_15_infos[5] = {
&to_ISO_8859_15_E2,
};
-const BYTE_LOOKUP
+static const BYTE_LOOKUP
to_ISO_8859_15 = {
to_ISO_8859_15_offsets,
@@ -3259,2 +3264,40 @@ to_ISO_8859_15 = {
};
+#define register_transcoder(from_e, to_e, tree_start, max_output, from_utf8) \
+ rb_register_transcoder(from_e, to_e, \
+ (from_utf8 ? rb_transcode_loop_from_utf8 : \
+ rb_transcode_loop_simple), \
+ tree_start, max_output)
+
+void
+Init_iso_8859(void)
+{
+ register_transcoder("ISO-8859-1", "UTF-8", &from_ISO_8859_1, 2, 0);
+ register_transcoder("ISO-8859-2", "UTF-8", &from_ISO_8859_2, 2, 0);
+ register_transcoder("ISO-8859-3", "UTF-8", &from_ISO_8859_3, 2, 0);
+ register_transcoder("ISO-8859-4", "UTF-8", &from_ISO_8859_4, 2, 0);
+ register_transcoder("ISO-8859-5", "UTF-8", &from_ISO_8859_5, 3, 0);
+ register_transcoder("ISO-8859-6", "UTF-8", &from_ISO_8859_6, 2, 0);
+ register_transcoder("ISO-8859-7", "UTF-8", &from_ISO_8859_7, 3, 0);
+ register_transcoder("ISO-8859-8", "UTF-8", &from_ISO_8859_8, 3, 0);
+ register_transcoder("ISO-8859-9", "UTF-8", &from_ISO_8859_9, 2, 0);
+ register_transcoder("ISO-8859-10", "UTF-8", &from_ISO_8859_10, 3, 0);
+ register_transcoder("ISO-8859-11", "UTF-8", &from_ISO_8859_11, 3, 0);
+ register_transcoder("ISO-8859-13", "UTF-8", &from_ISO_8859_13, 3, 0);
+ register_transcoder("ISO-8859-14", "UTF-8", &from_ISO_8859_14, 3, 0);
+ register_transcoder("ISO-8859-15", "UTF-8", &from_ISO_8859_15, 3, 0);
+ register_transcoder("UTF-8", "ISO-8859-1", &to_ISO_8859_1, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-2", &to_ISO_8859_2, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-3", &to_ISO_8859_3, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-4", &to_ISO_8859_4, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-5", &to_ISO_8859_5, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-6", &to_ISO_8859_6, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-7", &to_ISO_8859_7, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-8", &to_ISO_8859_8, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-9", &to_ISO_8859_9, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-10", &to_ISO_8859_10, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-11", &to_ISO_8859_11, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-13", &to_ISO_8859_13, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-14", &to_ISO_8859_14, 1, 1);
+ register_transcoder("UTF-8", "ISO-8859-15", &to_ISO_8859_15, 1, 1);
+}
Index: ext/enc/trans/iso_8859/extconf.rb
===================================================================
--- ext/enc/trans/iso_8859/extconf.rb (revision 0)
+++ ext/enc/trans/iso_8859/extconf.rb (revision 0)
@@ -0,0 +1 @@
+create_makefile("enc/trans/iso_8859")
--
--- 僕の前にBugはない。
--- 僕の後ろにBugはできる。
中田 伸悦