[ruby-dev:37203] Re: m17n of irb
From:
"Yugui (Yuki Sonoda)" <yugui@...>
Date:
2008-11-26 11:40:31 UTC
List:
ruby-dev #37203
keiju ISHITSUKA さんは書きました: > うーん. しょうがないんですかねぇ... > > それに, この問題はirbに限定されるわけではなく, 外部からの文字列をeval > するようなプログラムすべてに当てはまります. そんなに多いとは言いません > が, それなりにあることは確かで, 今私が開発している別の処理系でも同じ問 > 題があります. irbがある種のRuby処理系だから発生する問題であってあまり一般的ではないと 思っていました。あまり一般的な問題であっては欲しくないんですけどねー。 > default_internal, default_externalが再設定できないのは, 2度設定される > とどれが正しいのか分からなくなるからとのことのようですが, 起動時オプショ > ンや環境変数RUBYOPTで-Eで指定されていなかったら, 一度だけなら代入が許 > されても良いと思うのですが, いかがでしょうか? 既にdefault_internal, default_externalで開かれたリソースが存在する可能性 があると思います。ですから、変更するならばそのリソースに対して責任を負う 必要があって、あまり気軽に変更して欲しくはないと思います。 言語デザインとしてどうでしょうか? < まつもとさん 言い換えると、「一度だけ代入化」の選択肢ははあり得ますか? あるんだったら本件は割と簡単になりますよね。 > あとは, IRB.parse_opt の話で気が付きましたが, default_*を設定したくな > るのは, アプリケーションの初期化時点ですので, lib/optperse.rb 辺りに組 > み込んでしまうというのもありのような気がします. ただ, こちらの場合でも > 一部拡張ライブラリ化は必要になることになりますが, irb限定の対応ではな > いので, 利用価値は高くなると思います. ちょっとこの話はよく分からないので、風邪が治ったら考えさせてください。 > あと, default_src_encodingって何でしょうか? ML等では見掛けませんが? ruby.cでいうopt->src.encの意図でした。-Kの時に設定される、magic comment がないときのscript encodingです。-Kのとき以外は常にUS-ASCIIです。 それはそうと、IRB.parse_optsの拡張ライブラリ化を書いちゃったのでとりあえ ず添付しますね。ただ、このパッチではdefault_internalが設定できません。 rb_enc_set_default_internalが1度しか設定を許さないようになっていて、Ruby 本体のオプション解析の時点で設定されるからです。 この辺りも含めて、デフォルトのエンコーディング設定の変更を許すことが言語 デザイン的にあり得るのかどうかまつもとさんに伺いたいです。 私は結構現状に納得してます。 -- Yugui <yugui@yugui.jp> http://yugui.jp 私は私をDumpする
Attachments (1)
irb-m17n-option.diff
(18 KB, text/x-diff)
diff --git a/ext/irb/option/extconf.rb b/ext/irb/option/extconf.rb
new file mode 100644
index 0000000..f5f5d9f
--- /dev/null
+++ b/ext/irb/option/extconf.rb
@@ -0,0 +1,3 @@
+require 'mkmf'
+target = "irb/option"
+create_makefile(target)
diff --git a/ext/irb/option/option.c b/ext/irb/option/option.c
new file mode 100644
index 0000000..642950c
--- /dev/null
+++ b/ext/irb/option/option.c
@@ -0,0 +1,388 @@
+#include <ruby/ruby.h>
+#include <ruby/intern.h>
+#include <ruby/encoding.h>
+#include "../../../vm_core.h"
+
+static VALUE irb_cIRB = Qnil;
+struct command_line_options {
+ VALUE conf;
+ VALUE default_internal;
+ VALUE default_external;
+ VALUE source_encoding;
+ VALUE load_path;
+ VALUE script_name;
+};
+#define COMMAND_LINE_OPTIONS_INITIALIZER {\
+ Qnil, \
+ Qnil, \
+ Qnil, \
+ Qnil, \
+ rb_ary_new(), \
+ Qnil \
+}
+
+#define CSTR2SYM(str) ID2SYM(rb_intern(str))
+
+
+static void set_external_once(struct command_line_options *result, VALUE name);
+static void set_internal_once(struct command_line_options *result, VALUE name);
+static void set_source_encoding_once(struct command_line_options *result, VALUE name);
+
+static void set_encoding(struct command_line_options *opt, VALUE unused, void *data);
+static void set_kcode(struct command_line_options *result, VALUE unused, void *data);
+static void set_utf8(struct command_line_options *result, VALUE unused0, void *unused1);
+static void just_assign(struct command_line_options *opt, VALUE key, void *data);
+static void set_symbol(struct command_line_options *opt, VALUE key, void *data);
+static void set_numeric(struct command_line_options *opt, VALUE key, void *data);
+static void set_prompt_mode(struct command_line_options *opt, VALUE key, void *data);
+static void set_load_module(struct command_line_options *opt, VALUE key, void *data);
+static void set_debug(struct command_line_options *unused0, VALUE unused1, void *data);
+static void set_load_path(struct command_line_options *result, VALUE unused, void *data);
+static void print_version(struct command_line_options *unused0, VALUE unused1, void *data);
+static void print_help(struct command_line_options *unused0, VALUE unused1, void *data);
+
+struct option_tbl_entry {
+ const char *opt_name; /* name of the command line option */
+ const char *conf_name; /* corresponding key in the IRB.conf */
+ int has_arg; /* 1: the option does takes a parameter, 0: does not */
+ void (*handler)(struct command_line_options *result, VALUE key, void *data); /* option handler */
+ void *data; /* user defined data which will pass to handler when has_arg is 0 */
+};
+static struct option_tbl_entry
+option_tbl[] = {
+ {"d", 0, 0, set_debug, (void*)Qtrue },
+ {"f", "RC", 0, just_assign, (void*)Qfalse },
+ {"m", "MATH_MODE", 0, just_assign, (void*)Qtrue },
+ {"r", "LOAD_MODULES", 1, set_load_module, 0 },
+ {"I", 0, 1, set_load_path, 0 },
+ {"K", 0, 1, set_kcode, 0 },
+ {"E", 0, 1, set_encoding, 0 },
+ {"-encoding", 0, 1, set_encoding, 0 },
+ {"U", 0, 0, set_utf8, 0 },
+ {"h", 0, 0, print_help, 0 },
+ {"v", 0, 0, print_version, 0 },
+ {"-inspect", "INSPECT_MODE", 0, just_assign, (void*)Qtrue },
+ {"-noinspect", "INSPECT_MODE", 0, just_assign, (void*)Qfalse },
+ {"-readline", "USE_READLINE", 0, just_assign, (void*)Qtrue },
+ {"-noreadline", "USE_READLINE", 0, just_assign, (void*)Qfalse },
+ {"-echo", "ECHO", 0, just_assign, (void*)Qtrue },
+ {"-noecho", "ECHO", 0, just_assign, (void*)Qfalse },
+ {"-verbose", "VERBOSE", 0, just_assign, (void*)Qtrue },
+ {"-noverbose", "VERBOSE", 0, just_assign, (void*)Qfalse },
+ {"-prompt-mode", "PROMPT_MODE", 1, set_prompt_mode, 0 },
+ {"-prompt", "PROMPT_MODE", 1, set_prompt_mode, 0 },
+ {"-noprompt", "PROMPT_MODE", 0, set_symbol, "NULL" },
+ {"-inf-ruby-mode", "PROMPT_MODE", 0, set_symbol, "INF_RUBY" },
+ {"-sample-book-mode", "PROMPT_MODE", 0, set_symbol, "SIMPLE" },
+ {"-simple-prompt", "PROMPT_MODE", 0, set_symbol, "SIMPLE" },
+ {"-tracer", "USE_TRACER", 0, just_assign, (void*)Qtrue },
+ {"-single-irb", "SINGLE_IRB", 0, just_assign, (void*)Qtrue },
+ {"-back-trace-limit", "BACK_TRACE_LIMIT", 1, set_numeric, 0 },
+ {"-context-mode", "CONTEXT_MODE", 1, set_numeric, 0 },
+ {"-irb_debug", "DEBUG_LEVEL", 1, set_numeric, 0 },
+ {"-help", 0, 0, print_help, 0 },
+ {"-version", 0, 0, print_version, 0 },
+};
+
+
+static void
+process_opt_with_arg(struct command_line_options *result, VALUE opt, struct option_tbl_entry *entry, const char *arg)
+{
+ VALUE key = entry->conf_name ? CSTR2SYM(entry->conf_name) : 0;
+ if (*arg) {
+ (*entry->handler)(result, key, (void*)rb_enc_str_new(arg, strlen(arg), rb_enc_get(opt)));
+ }
+ else {
+ volatile VALUE val = rb_ary_shift(rb_argv);
+ if (NIL_P(val)) {
+ rb_raise(rb_eArgError, "%s needs an argument but not specified", entry->opt_name);
+ }
+ else {
+ (*entry->handler)(result, key, (void*)val);
+ }
+ }
+}
+
+static int
+process_opt(struct command_line_options *result, VALUE opt)
+{
+ int i;
+ const char *str;
+
+ StringValue(opt);
+ if (NIL_P(opt)) return 1;
+ if (!rb_enc_str_asciicompat_p(opt)) rb_raise(rb_eArgError, "an element of ARGV is not ASCII-compatible");
+ str = StringValueCStr(opt);
+
+ switch (str[0]) {
+ case '\0': /* empty string */
+ return 1;
+ default: /* does not seem to be an option */
+ result->script_name = opt;
+ return 0;
+ case '-': /* it's option. let's go */
+ ++str;
+ }
+
+ for (i = 0; i < sizeof(option_tbl)/sizeof(option_tbl[0]); ++i) {
+ struct option_tbl_entry *entry = &option_tbl[i];
+ if (entry->has_arg) {
+ if (strncmp(str, entry->opt_name, strlen(entry->opt_name)) != 0) continue;
+ if (*entry->opt_name == '-') { /* long option */
+ const char *arg = str + strlen(entry->opt_name);
+ switch (*arg) {
+ case '=':
+ ++arg;
+ /* fall through */
+ case '\0':
+ process_opt_with_arg(result, opt, entry, arg);
+ return 1;
+ default:
+ continue; /* for loop */
+ }
+ }
+ else { /* short option */
+ process_opt_with_arg(result, opt, entry, ++str);
+ return 1;
+ }
+ }
+ else if (strcmp(str, entry->opt_name) == 0) {
+ (*entry->handler)(result,
+ entry->conf_name ? CSTR2SYM(entry->conf_name) : Qnil,
+ entry->data);
+ return 1;
+ }
+ }
+
+ if (strcmp(str, "-") == 0) {
+ result->script_name = rb_ary_shift(rb_argv);
+ return 0;
+ }
+
+ VALUE ex = rb_const_get(rb_cObject, rb_intern("UnrecognizedSwitch"));
+ rb_funcall(irb_cIRB, rb_intern("fail"), 2, ex, opt);
+
+ return 0; /* not reached */
+}
+
+static VALUE
+irb_s_parse_opts(VALUE irb)
+{
+ volatile VALUE conf = rb_iv_get(irb_cIRB, "@CONF");
+ volatile struct command_line_options result = COMMAND_LINE_OPTIONS_INITIALIZER;
+ Check_Type(conf, T_HASH);
+ result.conf = conf;
+
+ for (;;) {
+ volatile VALUE arg = rb_ary_shift(rb_argv);
+ if (!RTEST(arg)) break;
+ if (!process_opt((struct command_line_options*)&result, arg)) break;
+ }
+
+ if (RTEST(result.script_name)) {
+ rb_gv_set("$0", result.script_name);
+ rb_hash_aset(conf, CSTR2SYM("SCRIPT_NAME"), result.script_name);
+ }
+ if (RTEST(result.default_external)) {
+ VALUE enc = rb_funcall(rb_cEncoding, rb_intern("find"), 1, result.default_external);
+ rb_enc_set_default_external(enc);
+ }
+ if (RTEST(result.default_internal)) {
+ VALUE enc = rb_funcall(rb_cEncoding, rb_intern("find"), 1, result.default_internal);
+ rb_enc_set_default_internal(enc);
+ }
+ if (RTEST(result.source_encoding)) {
+ GET_VM()->src_encoding_index = rb_to_encoding_index(result.source_encoding);
+ rb_hash_aset(conf, CSTR2SYM("SOURCE_ENCODING"), result.source_encoding);
+ }
+ if (RTEST(rb_funcall2(result.load_path, rb_intern("empty?"), 0, 0))) {
+ int i, len;
+ VALUE load_path;
+ len = RARRAY_LEN(result.load_path);
+ for (i = 0; i < len; ++i) {
+ VALUE path = rb_ary_entry(result.load_path, i);
+ const char *str = StringValueCStr(path);
+ if (str[0] == '.' && str[1] == '/') {
+ path = rb_funcall(rb_cFile, rb_intern("expand_path"), 1, path);
+ rb_ary_store(result.load_path, i, path);
+ }
+ }
+ load_path = rb_gv_get("$:");
+ rb_funcall2(load_path, rb_intern("unshift"), len, RARRAY_PTR(result.load_path));
+ }
+ return Qnil;
+}
+
+
+/* command line option handlers */
+static void
+just_assign(struct command_line_options *opt, VALUE key, void *data)
+{
+ rb_hash_aset(opt->conf, key, (VALUE)data);
+}
+
+static void
+set_symbol(struct command_line_options *opt, VALUE key, void *data)
+{
+ rb_hash_aset(opt->conf, key, CSTR2SYM((const char*)data));
+}
+
+static void
+set_numeric(struct command_line_options *opt, VALUE key, void *data)
+{
+ VALUE arg = (VALUE)data;
+ arg = rb_funcall2(arg, rb_intern("to_i"), 0, 0);
+ rb_hash_aset(opt->conf, key, arg);
+}
+
+static void
+set_prompt_mode(struct command_line_options *opt, VALUE key, void *data)
+{
+ VALUE mode = (VALUE)data;
+ mode = rb_funcall2(mode, rb_intern("upcase"), 0, NULL);
+ mode = rb_funcall(mode, rb_intern("tr"), 2, rb_str_new_cstr("-"), rb_str_new_cstr("_"));
+ rb_hash_aset(opt->conf, key, mode);
+}
+
+static void
+set_load_module(struct command_line_options *opt, VALUE key, void *data)
+{
+ VALUE libname = (VALUE)data;
+ if (RTEST(libname)) {
+ VALUE load_modules = rb_hash_aref(opt->conf, key);
+ Check_Type(load_modules, T_ARRAY);
+ rb_ary_push(load_modules, libname);
+ }
+}
+
+static void
+set_debug(struct command_line_options *unused0, VALUE unused1, void *data)
+{
+ rb_gv_set("$DEBUG", (VALUE)data);
+}
+
+static void
+set_load_path(struct command_line_options *result, VALUE unused, void *data)
+{
+ VALUE paths = (VALUE)data;
+ if (RTEST(paths)) {
+ VALUE sep = rb_const_get(rb_cFile, rb_intern("PATH_SEPARATOR"));
+ paths = rb_funcall(paths, rb_intern("split"), 1, sep);
+ rb_ary_concat(result->load_path, paths);
+ }
+}
+
+static void
+print_version(struct command_line_options *unused0, VALUE unused1, void *data)
+{
+ rb_funcall(rb_stdout, rb_intern("print"), 1, rb_funcall2(irb_cIRB, rb_intern("version"), 0, 0));
+ rb_exit(0);
+}
+
+static void
+print_help(struct command_line_options *unused0, VALUE unused1, void *data)
+{
+ rb_require("irb/help");
+ rb_funcall2(irb_cIRB, rb_intern("print_usage"), 0, 0);
+ rb_exit(0);
+}
+
+static void
+set_encoding(struct command_line_options *result, VALUE unused, void *data)
+{
+ VALUE encodings = rb_funcall((VALUE)data, rb_intern("split"), 2, rb_str_new_cstr(":"), INT2FIX(2));
+ encodings = rb_convert_type(encodings, T_ARRAY, "Array", "to_ary");
+ set_external_once(result, rb_ary_entry(encodings, 0));
+ set_internal_once(result, rb_ary_entry(encodings, 1));
+}
+
+static void
+set_kcode(struct command_line_options *result, VALUE unused, void *data)
+{
+ VALUE kcode = (VALUE)data;
+ if (!rb_enc_str_asciicompat_p(kcode)) {
+ rb_raise(rb_eArgError, "-K took a ASCII-noncompatible argument");
+ }
+
+ switch (StringValuePtr(kcode)[0]) {
+ case 's': case 'S':
+ kcode = rb_usascii_str_new_cstr("CP932");
+ break;
+ case 'e': case 'E':
+ kcode = rb_usascii_str_new_cstr("EUC-JP");
+ break;
+ case 'u': case 'U':
+ kcode = rb_usascii_str_new_cstr("UTF-8");
+ break;
+ case 'n': case 'N':
+ kcode = rb_usascii_str_new_cstr("ASCII-8BIT");
+ break;
+ default:
+ rb_raise(rb_eArgError, "unrecognized KCODE %s", StringValuePtr(kcode));
+ }
+ set_internal_once(result, kcode);
+ set_external_once(result, kcode);
+ set_source_encoding_once(result, kcode);
+}
+
+static void
+set_utf8(struct command_line_options *result, VALUE unused0, void *unused1)
+{
+ volatile VALUE utf8 = rb_usascii_str_new_cstr("UTF-8");
+ set_internal_once(result, utf8);
+ set_external_once(result, utf8);
+}
+
+
+/* helper functions for encoding options */
+static void
+set_external_once(struct command_line_options *result, VALUE name)
+{
+ Check_Type(name, T_STRING);
+ if (result->default_external) {
+ result->default_external = rb_obj_freeze(name);
+ }
+ else {
+ if (rb_funcall(result->default_external, rb_intern("casecmp"), 1, name) != INT2FIX(0)) {
+ rb_raise(rb_eRuntimeError,
+ "default_external already set to %s", StringValuePtr(result->default_internal));
+ }
+ }
+}
+static void
+set_internal_once(struct command_line_options *result, VALUE name)
+{
+ Check_Type(name, T_STRING);
+ if (result->default_internal) {
+ result->default_internal = rb_obj_freeze(name);
+ }
+ else {
+ if (rb_funcall(result->default_internal, rb_intern("casecmp"), 1, name) != INT2FIX(0)) {
+ rb_raise(rb_eRuntimeError,
+ "default_internal already set to %s", StringValuePtr(result->default_internal));
+ }
+ }
+}
+static void
+set_source_encoding_once(struct command_line_options *result, VALUE name)
+{
+ Check_Type(name, T_STRING);
+ if (result->source_encoding) {
+ result->source_encoding = rb_obj_freeze(name);
+ }
+ else {
+ if (rb_funcall(result->source_encoding, rb_intern("casecmp"), 1, name) != INT2FIX(0)) {
+ rb_raise(rb_eRuntimeError,
+ "source_encoding already set to %s", StringValuePtr(result->source_encoding));
+ }
+ }
+}
+
+
+void
+Init_option(void)
+{
+ rb_require("irb");
+ irb_cIRB = rb_const_get(rb_cObject, rb_intern("IRB"));
+ rb_define_singleton_method(irb_cIRB, "parse_opts", irb_s_parse_opts, 0);
+}
diff --git a/lib/irb.rb b/lib/irb.rb
index f5e662a..bc8d13c 100644
--- a/lib/irb.rb
+++ b/lib/irb.rb
@@ -11,6 +11,7 @@
require "e2mmap"
require "irb/init"
+require "irb/option"
require "irb/context"
require "irb/extend-command"
#require "irb/workspace"
diff --git a/lib/irb/init.rb b/lib/irb/init.rb
index 576b99d..ee9e8df 100644
--- a/lib/irb/init.rb
+++ b/lib/irb/init.rb
@@ -120,83 +120,6 @@ module IRB
@CONF[:LC_MESSAGES].load("irb/error.rb")
end
- FEATURE_IOPT_CHANGE_VERSION = "1.9.0"
-
- # option analyzing
- def IRB.parse_opts
- load_path = []
- while opt = ARGV.shift
- case opt
- when "-f"
- @CONF[:RC] = false
- when "-m"
- @CONF[:MATH_MODE] = true
- when "-d"
- $DEBUG = true
- when /^-r(.+)?/
- opt = $1 || ARGV.shift
- @CONF[:LOAD_MODULES].push opt if opt
- when /^-I(.+)?/
- opt = $1 || ARGV.shift
- load_path.concat(opt.split(File::PATH_SEPARATOR)) if opt
- when "--inspect"
- @CONF[:INSPECT_MODE] = true
- when "--noinspect"
- @CONF[:INSPECT_MODE] = false
- when "--readline"
- @CONF[:USE_READLINE] = true
- when "--noreadline"
- @CONF[:USE_READLINE] = false
- when "--echo"
- @CONF[:ECHO] = true
- when "--noecho"
- @CONF[:ECHO] = false
- when "--verbose"
- @CONF[:VERBOSE] = true
- when "--noverbose"
- @CONF[:VERBOSE] = false
- when "--prompt-mode", "--prompt"
- prompt_mode = ARGV.shift.upcase.tr("-", "_").intern
- @CONF[:PROMPT_MODE] = prompt_mode
- when "--noprompt"
- @CONF[:PROMPT_MODE] = :NULL
- when "--inf-ruby-mode"
- @CONF[:PROMPT_MODE] = :INF_RUBY
- when "--sample-book-mode", "--simple-prompt"
- @CONF[:PROMPT_MODE] = :SIMPLE
- when "--tracer"
- @CONF[:USE_TRACER] = true
- when "--back-trace-limit"
- @CONF[:BACK_TRACE_LIMIT] = ARGV.shift.to_i
- when "--context-mode"
- @CONF[:CONTEXT_MODE] = ARGV.shift.to_i
- when "--single-irb"
- @CONF[:SINGLE_IRB] = true
- when "--irb_debug"
- @CONF[:DEBUG_LEVEL] = ARGV.shift.to_i
- when "-v", "--version"
- print IRB.version, "\n"
- exit 0
- when "-h", "--help"
- require "irb/help"
- IRB.print_usage
- exit 0
- when /^-/
- IRB.fail UnrecognizedSwitch, opt
- else
- @CONF[:SCRIPT] = opt
- $0 = opt
- break
- end
- end
- if RUBY_VERSION >= FEATURE_IOPT_CHANGE_VERSION
- load_path.collect! do |path|
- /\A\.\// =~ path ? path : File.expand_path(path)
- end
- end
- $LOAD_PATH.unshift(*load_path)
- end
-
# running config
def IRB.run_config
if @CONF[:RC]
diff --git a/lib/irb/input-method.rb b/lib/irb/input-method.rb
index 2f77245..5f8bb7e 100644
--- a/lib/irb/input-method.rb
+++ b/lib/irb/input-method.rb
@@ -44,7 +44,7 @@ module IRB
super
@line_no = 0
@line = []
- $stdin.set_encoding(IRB.conf[:LC_MESSAGES].encoding, nil)
+ $stdin.set_encoding(IRB.conf[:SOURCE_ENCODING] || IRB.conf[:LC_MESSAGES].encoding, nil)
end
def gets
@@ -102,7 +102,7 @@ module IRB
@line_no = 0
@line = []
@eof = false
- STDIN.set_encoding(IRB.conf[:LC_MESSAGES].encoding, nil)
+ STDIN.set_encoding(IRB.conf[:SOURCE_ENCODING] || IRB.conf[:LC_MESSAGES].encoding, nil)
end
def gets