[ruby-list:50219] YAML.dump fails to write a file with UTF-8 string

From: Yuji Yamano <yyamano@...>
Date: 2015-08-26 15:23:34 UTC
List: ruby-list #50219
こんばんは。

utf-8の文字列を含む Hash を YAML.dump を使ってファイルに書き込もうとすると、
Encoding::UndefinedConversionError になるのですが、これはバグでしょうか?
それとも制約でしょうか?

ruby 2.0.0、2.2.3で試しましたが同じ結果でした。

% cat foo.rb
# -*- coding: utf-8 -*-                                                                                                                                     
require 'yaml'

Encoding.default_internal = 'utf-8'

config = {'name' => 'あ'}
# config = {'name' => 'A'}

puts "encoding=#{config['name'].encoding}"
puts "default_internal=#{Encoding.default_internal}"
puts "default_external=#{Encoding.default_external}"

out = File.open("foo.yml", "w:utf-8")
begin
  YAML.dump(config, out)
ensure
  out.close
end

% ruby -v
ruby 2.0.0p598 (2014-11-13) [x86_64-linux]
% ruby ./foo.rb 
encoding=UTF-8
default_internal=UTF-8
default_external=US-ASCII
/usr/share/ruby/psych/visitors/emitter.rb:27:in `write': "\xE3" from ASCII-8BIT to UTF-8 (Encoding::UndefinedConversionError)
	from /usr/share/ruby/psych/visitors/emitter.rb:27:in `end_document'
	from /usr/share/ruby/psych/visitors/emitter.rb:27:in `visit_Psych_Nodes_Document'
	from /usr/share/ruby/psych/visitors/visitor.rb:15:in `visit'
	from /usr/share/ruby/psych/visitors/visitor.rb:5:in `accept'
	from /usr/share/ruby/psych/visitors/emitter.rb:20:in `block in visit_Psych_Nodes_Stream'
	from /usr/share/ruby/psych/visitors/emitter.rb:20:in `each'
	from /usr/share/ruby/psych/visitors/emitter.rb:20:in `visit_Psych_Nodes_Stream'
	from /usr/share/ruby/psych/visitors/visitor.rb:15:in `visit'
	from /usr/share/ruby/psych/visitors/visitor.rb:5:in `accept'
	from /usr/share/ruby/psych/nodes/node.rb:46:in `yaml'
	from /usr/share/ruby/psych.rb:245:in `dump'
	from ./foo.rb:15:in `<main>'
% /usr/local/ruby/223/bin/ruby -v
ruby 2.2.3p173 (2015-08-18 revision 51636) [x86_64-linux]
% /usr/local/ruby/223/bin/ruby ./foo.rb
encoding=UTF-8
default_internal=UTF-8
default_external=US-ASCII
/usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/emitter.rb:27:in `write': "\xE3" from ASCII-8BIT to UTF-8 (Encoding::UndefinedConversionError)
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/emitter.rb:27:in `end_document'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/emitter.rb:27:in `visit_Psych_Nodes_Document'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/visitor.rb:15:in `visit'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/visitor.rb:5:in `accept'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/emitter.rb:20:in `block in visit_Psych_Nodes_Stream'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/emitter.rb:20:in `each'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/emitter.rb:20:in `visit_Psych_Nodes_Stream'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/visitor.rb:15:in `visit'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/visitors/visitor.rb:5:in `accept'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych/nodes/node.rb:48:in `yaml'
	from /usr/local/ruby/223/lib/ruby/2.2.0/psych.rb:410:in `dump'
	from ./foo.rb:15:in `<main>'


以下のパッチをあてると上のコードは動くようになりますが、ruby の内部構造をよく
知らないので、これが正しいのかどうかよくわかりません。

--- ext/psych/psych_emitter.c.orig	2015-08-26 14:53:29.697669630 +0000
+++ ext/psych/psych_emitter.c	2015-08-26 14:57:08.780172213 +0000
@@ -15,7 +15,11 @@
 static int writer(void *ctx, unsigned char *buffer, size_t size)
 {
     VALUE io = (VALUE)ctx;
+#ifdef HAVE_RUBY_ENCODING_H
+    VALUE str = rb_enc_str_new((const char *)buffer, (long)size, rb_utf8_encoding());
+#else
     VALUE str = rb_str_new((const char *)buffer, (long)size);
+#endif
     VALUE wrote = rb_funcall(io, id_write, 1, str);
     return (int)NUM2INT(wrote);
 }


-- Yuji Yamano

In This Thread

Prev Next