[ruby-dev:29643] [patch] REXMLでDTDを読み込む
From:
堀川久 <vzw00011@...>
Date:
2006-10-09 06:31:38 UTC
List:
ruby-dev #29643
こんにちは。 Ruby 1.8.5添付のREXMLで、DTDを読み込めるようにしました。 まだvalidationはできません。RELAX NGと共通部分をまとめたいと思うのです が、RELAX NGのほうも問題含みです。 方向性の確認のため、パッチを投げます。取り込んでいただけないでしょうか。 ・rexml/dtd/dtd.rb があるが、メンテナンスされておらず、Ruby 1.8では エラーになる。 ・rexml/dtd/*とrexml/*に重複が多い。おそらくdtd/*のほうが古いコード と思うので、そちらを削除。 ・スーパークラスElementがサブクラスDocumentに依存している部分がある。 ・Parsers::TreeParserがDocumentに依存している部分がある。 ・その他、内部で使用しているのにrequireしていない部分がある。 よろしくお願いします。 -- HORIKAWA Hisashi (in Kanji: 堀川 久) Netsphere Laboratories http://www.nslabs.jp/
Attachments (1)
rexml-diff.diff
(9.63 KB, text/x-diff)
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/child.rb /lib/ruby/1.8/rexml/child.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/child.rb 2003-06-10 10:31:04.000000000 +0900
+++ /lib/ruby/1.8/rexml/child.rb 2006-10-06 09:27:04.963844500 +0900
@@ -15,6 +15,7 @@
# if supplied, the parent of this child will be set to the
# supplied value, and self will be added to the parent
def initialize( parent = nil )
+ raise TypeError, "expected a Parent but #{parent.inspect}" if parent && !defined?(parent.add)
@parent = nil
# Declare @parent, but don't define it. The next line sets the
# parent.
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/doctype.rb /lib/ruby/1.8/rexml/doctype.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/doctype.rb 2006-04-14 11:56:42.000000000 +0900
+++ /lib/ruby/1.8/rexml/doctype.rb 2006-10-06 19:05:32.704872500 +0900
@@ -4,6 +4,8 @@
require 'rexml/entity'
require 'rexml/attlistdecl'
require 'rexml/xmltokens'
+require 'rexml/dtd/dtd'
+require 'rexml/element'
module REXML
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
@@ -61,6 +63,7 @@
event = parser.pull
if event[0] == :start_doctype
@name, @external_id, @long_name, @uri, = event[1..-1]
+ DTD::Parser.parse_dtd parser, self
end
else
super()
@@ -227,9 +230,14 @@
public
class ElementDecl < Declaration
+ PATTERN_RE = /^\s*<!ELEMENT\s+([A-Za-z_:][A-Za-z0-9_:.-]*)\s+(.+)/um
def initialize( src )
super
+ md = PATTERN_RE.match src
+ @name = md[1]
+ @rest = md[2]
end
+ attr_reader :name, :rest
end
class ExternalEntity < Child
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/attlistdecl.rb /lib/ruby/1.8/rexml/dtd/attlistdecl.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/attlistdecl.rb 2003-06-10 10:31:07.000000000 +0900
+++ /lib/ruby/1.8/rexml/dtd/attlistdecl.rb 1970-01-01 09:00:00.000000000 +0900
@@ -1,10 +0,0 @@
-require "rexml/child"
-module REXML
- module DTD
- class AttlistDecl < Child
- START = "<!ATTLIST"
- START_RE = /^\s*#{START}/um
- PATTERN_RE = /\s*(#{START}.*?>)/um
- end
- end
-end
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/dtd.rb /lib/ruby/1.8/rexml/dtd/dtd.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/dtd.rb 2003-06-10 10:31:07.000000000 +0900
+++ /lib/ruby/1.8/rexml/dtd/dtd.rb 2006-10-09 15:07:49.369206500 +0900
@@ -1,9 +1,9 @@
-require "rexml/dtd/elementdecl"
-require "rexml/dtd/entitydecl"
+require 'rexml/doctype'
+require "rexml/entity"
require "rexml/comment"
-require "rexml/dtd/notationdecl"
-require "rexml/dtd/attlistdecl"
+require "rexml/attlistdecl"
require "rexml/parent"
+require 'rexml/parsers/baseparser'
module REXML
module DTD
@@ -17,34 +17,34 @@
end
end
- # Takes a String and parses it out
- def Parser.parse_helper( input )
- contents = Parent.new
- while input.size > 0
- case input
- when ElementDecl.PATTERN_RE
- match = $&
- source = $'
- contents << EleemntDecl.new( match )
- when AttlistDecl.PATTERN_RE
- matchdata = $~
- source = $'
- contents << AttlistDecl.new( matchdata )
- when EntityDecl.PATTERN_RE
- matchdata = $~
- source = $'
- contents << EntityDecl.new( matchdata )
- when Comment.PATTERN_RE
- matchdata = $~
- source = $'
- contents << Comment.new( matchdata )
- when NotationDecl.PATTERN_RE
- matchdata = $~
- source = $'
- contents << NotationDecl.new( matchdata )
+ def Parser.parse_dtd parser, parent
+ while parser.has_next?
+ event = parser.pull
+ break if event[0] == :end_doctype
+
+ case event[0]
+ when :elementdecl
+ parent.add ElementDecl.new(event[1])
+ when :attlistdecl
+ parent.add AttlistDecl.new(event[1..-1])
+ when :entitydecl
+ parent.add Entity.new(event[1..-1])
+ when :notationdecl
+ parent.add NotationDecl.new(*event[1..-1])
+ when :externalentity
+ # TODO: fixme
+ when :processing_instruction, :comment, :text
+ # empty
+ else
+ raise ParseException.new("unexpected: #{event}")
end
end
- contents
+ parent
+ end
+
+ def Parser.parse_helper( input )
+ parser = Parsers::BaseParser.new(input, :document_status => :in_doctype)
+ return parse_dtd(parser, Parent.new)
end
end
end
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/elementdecl.rb /lib/ruby/1.8/rexml/dtd/elementdecl.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/elementdecl.rb 2003-06-10 10:31:07.000000000 +0900
+++ /lib/ruby/1.8/rexml/dtd/elementdecl.rb 1970-01-01 09:00:00.000000000 +0900
@@ -1,17 +0,0 @@
-require "rexml/child"
-module REXML
- module DTD
- class ElementDecl < Child
- START = "<!ELEMENT"
- START_RE = /^\s*#{START}/um
- PATTERN_RE = /^\s*(#{START}.*?)>/um
- PATTERN_RE = /^\s*#{START}\s+((?:[:\w_][-\.\w_]*:)?[-!\*\.\w_]*)(.*?)>/
- #\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
-
- def initialize match
- @name = match[1]
- @rest = match[2]
- end
- end
- end
-end
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/entitydecl.rb /lib/ruby/1.8/rexml/dtd/entitydecl.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/entitydecl.rb 2004-04-02 12:26:19.000000000 +0900
+++ /lib/ruby/1.8/rexml/dtd/entitydecl.rb 2006-10-06 18:52:06.496170200 +0900
@@ -1,3 +1,4 @@
+=begin
require "rexml/child"
module REXML
module DTD
@@ -54,3 +55,4 @@
end
end
end
+=end
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/notationdecl.rb /lib/ruby/1.8/rexml/dtd/notationdecl.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/dtd/notationdecl.rb 2004-04-02 12:26:19.000000000 +0900
+++ /lib/ruby/1.8/rexml/dtd/notationdecl.rb 1970-01-01 09:00:00.000000000 +0900
@@ -1,39 +0,0 @@
-require "rexml/child"
-module REXML
- module DTD
- class NotationDecl < Child
- START = "<!NOTATION"
- START_RE = /^\s*#{START}/um
- PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
- SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
- def initialize src
- super()
- if src.match( PUBLIC )
- md = src.match( PUBLIC, true )
- elsif src.match( SYSTEM )
- md = src.match( SYSTEM, true )
- else
- raise ParseException.new( "error parsing notation: no matching pattern", src )
- end
- @name = md[1]
- @middle = md[2]
- @rest = md[3]
- end
-
- def to_s
- "<!NOTATION #@name #@middle #@rest>"
- end
-
- def write( output, indent )
- indent( output, indent )
- output << to_s
- end
-
- def NotationDecl.parse_source source, listener
- md = source.match( PATTERN_RE, true )
- thing = md[0].squeeze(" \t\n\r")
- listener.send inspect.downcase, thing
- end
- end
- end
-end
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/element.rb /lib/ruby/1.8/rexml/element.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/element.rb 2006-09-08 10:53:31.000000000 +0900
+++ /lib/ruby/1.8/rexml/element.rb 2006-10-06 19:00:22.623659300 +0900
@@ -121,9 +121,7 @@
end
def root
- return elements[1] if self.kind_of? Document
- return self if parent.kind_of? Document or parent.nil?
- return parent.root
+ parent.nil? ? self : parent.root
end
# Evaluates to the document to which this element belongs, or nil if this
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/parsers/baseparser.rb /lib/ruby/1.8/rexml/parsers/baseparser.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/parsers/baseparser.rb 2006-09-08 10:53:33.000000000 +0900
+++ /lib/ruby/1.8/rexml/parsers/baseparser.rb 2006-10-06 18:58:00.054686300 +0900
@@ -103,7 +103,8 @@
######################################################################
MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
- def initialize( source )
+ def initialize source, option = {}
+ @option = option
self.stream = source
end
@@ -126,10 +127,10 @@
attr_reader :source
- def stream=( source )
+ def stream= source
@source = SourceFactory.create_from( source )
@closed = nil
- @document_status = nil
+ @document_status = @option[:document_status]
@tags = []
@stack = []
@entities = []
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/parsers/streamparser.rb /lib/ruby/1.8/rexml/parsers/streamparser.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/parsers/streamparser.rb 2006-04-14 11:56:44.000000000 +0900
+++ /lib/ruby/1.8/rexml/parsers/streamparser.rb 2006-10-06 18:54:11.942928700 +0900
@@ -1,3 +1,5 @@
+require 'rexml/parsers/baseparser'
+
module REXML
module Parsers
class StreamParser
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/parsers/treeparser.rb /lib/ruby/1.8/rexml/parsers/treeparser.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/parsers/treeparser.rb 2006-04-14 11:56:44.000000000 +0900
+++ /lib/ruby/1.8/rexml/parsers/treeparser.rb 2006-10-06 18:54:28.689800100 +0900
@@ -3,7 +3,7 @@
module REXML
module Parsers
class TreeParser
- def initialize( source, build_context = Document.new )
+ def initialize source, build_context
@build_context = build_context
@parser = Parsers::BaseParser.new( source )
end
diff -Naurw /opt/src/ruby-1.8.5-20061002/lib/rexml/text.rb /lib/ruby/1.8/rexml/text.rb
--- /opt/src/ruby-1.8.5-20061002/lib/rexml/text.rb 2006-04-14 11:56:43.000000000 +0900
+++ /lib/ruby/1.8/rexml/text.rb 2006-10-09 15:12:42.191645500 +0900
@@ -1,7 +1,5 @@
require 'rexml/entity'
-require 'rexml/doctype'
require 'rexml/child'
-require 'rexml/doctype'
require 'rexml/parseexception'
module REXML