# Implement a REXML Pull Parser interface on top of the Expat parser module REXML module Parsers class ExpatParser def initialize xml callcc { |@sax_context| return } require 'xml/parser' @parser=XML::Parser.new class <<@parser # enable additional events attr_accessor :comment, :startCdata, :endCdata, :xmlDecl end inCDATA=false @parser.parse(xml) do |type, name, data| case type when XML::Parser::START_ELEM # name = element name ; data = hash of attributes push :start_element, name, data when XML::Parser::END_ELEM # name = element name ; data = nil push :end_element, name when XML::Parser::CDATA # name = nil ; data = string if inCDATA push :cdata, data else push :text, data end when XML::Parser::PI # name = notation name ; data = [URL base, system ID, public ID] push :processing_instruction, name, ' ' + data when XML::Parser::NOTATION_DECL # name = notation name ; data = [URL base, system ID, public ID] when XML::Parser::EXTERNAL_ENTITY_REF # name = entity names ; data = [URL base, system ID, public ID] when XML::Parser::COMMENT # name = nil ; data = string push :comment, data when XML::Parser::START_CDATA # name = nil ; data = nil inCDATA=true when XML::Parser::END_CDATA # name = nil ; data = nil inCDATA=false when XML::Parser::START_NAMESPACE_DECL # name = prefix ; data = URI when XML::Parser::END_NAMESPACE_DECL # name = prefix ; data = nil when XML::Parser::START_DOCTYPE_DECL # name = doctype name ; data = nil when XML::Parser::END_DOCTYPE_DECL # name = nil ; data = nil when XML::Parser::DEFAULT # defaultExpand enables this event # name = nil ; data = string when XML::Parser::ELEMENT_DECL # name = element name ; data = [type, quant, name, [...]] when XML::Parser::ATTLIST_DECL # name = element name ; data = [attname,atttype,default,isrequired] when XML::Parser::XML_DECL # name = nil ; data = [version, encoding, standalone] data[-1] = data[-1]==0 ? "no" : "yes" push :xmldecl, *data when XML::Parser::ENTITY_DECL # name = entity name ; data = [type, quant, name, [...]] end end @parser.done push :end_document while true rescue XMLParserError => exception @parser.done raise REXML::ParseException.new(exception.message) push :end_document while true end def push *value callcc { |@sax_context| @pull_context.call value } end def pull callcc { |@pull_context| @sax_context.call } end end end end # create a registry of parsers, if none exist if not REXML::Parsers.public_methods.include? "registry" module REXML module Parsers require 'rexml/parsers/baseparser' @@registry = {:base=>BaseParser} def Parsers::registry @@registry end class TreeParser def initialize( source, build_context = Document.new, parser=:default ) @build_context = build_context @parser = (Parsers.registry[parser] or BaseParser).new source end end end end end # Add Expat to the registry REXML::Parsers.registry[:expat] = REXML::Parsers::ExpatParser # unit tests if __FILE__ == $0 require 'test/unit' class REXML::Parsers::ExpatParser::Tests < Test::Unit::TestCase def match_base doc require "rexml/parsers/baseparser" parser1 = REXML::Parsers::BaseParser.new doc parser2 = REXML::Parsers::ExpatParser.new doc loop do event = parser1.pull assert_equal event, parser2.pull break if event == [:end_document] end end def contains_event doc, expected parser = REXML::Parsers::ExpatParser.new doc event = nil loop do event = parser.pull break if event == expected or event == [:end_document] end assert_equal expected, event end def test_elements match_base "" end def test_attr match_base "" end def test_xmlns match_base "" match_base "" end def test_text match_base "2" end def test_cdata match_base "" end def test_pi match_base "" end def test_comment match_base "" end def test_comment match_base "" end def test_xml_decl match_base '' end def test_parser_error contains_event '<', [:text, '<'] contains_event ' ', [:text, ' '] contains_event 'A', [:text, 'A'] end def test_file require "stringio" file = StringIO.new file.write "" file.rewind contains_event file, [:end_element, "foo"] end # NOT SUPPORTED BELOW THIS LINE def TBD_doctype match_base '' end end end