# Implement a REXML Pull Parser interface on top of the Expat parser 
module REXML
  module Parsers
    class ExpatParser
      def initialize xml
        callcc { |@sax_context| return }

        require 'xml/parser'
        @parser=XML::Parser.new
        class <<@parser
          # enable additional events
          attr_accessor :comment, :startCdata, :endCdata, :xmlDecl
        end

        inCDATA=false
        @parser.parse(xml) do |type, name, data|
          case type
          when XML::Parser::START_ELEM
            # name = element name  ; data = hash of attributes
            push :start_element, name, data
    
          when XML::Parser::END_ELEM
            # name = element name  ; data = nil
            push :end_element, name
    
          when XML::Parser::CDATA
            # name = nil           ; data = string
            if inCDATA
              push :cdata, data
            else
              push :text, data
            end
    
          when XML::Parser::PI
            # name = notation name ; data = [URL base, system ID, public ID]
            push :processing_instruction, name, ' ' + data
    
          when XML::Parser::NOTATION_DECL                     
            # name = notation name ; data = [URL base, system ID, public ID]
    
          when XML::Parser::EXTERNAL_ENTITY_REF               
            # name = entity names  ; data = [URL base, system ID, public ID]
    
          when XML::Parser::COMMENT                           
            # name = nil           ; data = string
            push :comment, data
    
          when XML::Parser::START_CDATA                       
            # name = nil           ; data = nil
            inCDATA=true
    
          when XML::Parser::END_CDATA                         
            # name = nil           ; data = nil
            inCDATA=false
    
          when XML::Parser::START_NAMESPACE_DECL              
            # name = prefix        ; data = URI
    
          when XML::Parser::END_NAMESPACE_DECL                
            # name = prefix        ; data = nil
    
          when XML::Parser::START_DOCTYPE_DECL                
            # name = doctype name  ; data = nil
    
          when XML::Parser::END_DOCTYPE_DECL                  
            # name = nil           ; data = nil
    
          when XML::Parser::DEFAULT # defaultExpand enables this event
            # name = nil           ; data = string
    
          when XML::Parser::ELEMENT_DECL                      
            # name = element name  ; data = [type, quant, name, [...]]
    
          when XML::Parser::ATTLIST_DECL                      
            # name = element name  ; data = [attname,atttype,default,isrequired]
    
          when XML::Parser::XML_DECL                          
            # name = nil            ; data = [version, encoding, standalone]
            data[-1] = data[-1]==0 ? "no" : "yes"
            push :xmldecl, *data
    
          when XML::Parser::ENTITY_DECL         
            # name = entity name   ; data = [type, quant, name, [...]]
    
          end
        end

        @parser.done
        push :end_document while true
      rescue XMLParserError => exception
        @parser.done
        raise REXML::ParseException.new(exception.message)
        push :end_document while true
      end

      def push *value
        callcc { |@sax_context| @pull_context.call value }
      end
    
      def pull
        callcc { |@pull_context| @sax_context.call }
      end
    end
  end
end

# create a registry of parsers, if none exist
if not REXML::Parsers.public_methods.include? "registry"
  module REXML
    module Parsers
      require 'rexml/parsers/baseparser'
      @@registry = {:base=>BaseParser}
      def Parsers::registry
        @@registry
      end
  
      class TreeParser
        def initialize( source, build_context = Document.new, parser=:default )
          @build_context = build_context
          @parser = (Parsers.registry[parser] or BaseParser).new source
        end
      end
    end
  end
end

# Add Expat to the registry 
REXML::Parsers.registry[:expat] = REXML::Parsers::ExpatParser

# unit tests
if __FILE__ == $0
  require 'test/unit'
  class REXML::Parsers::ExpatParser::Tests < Test::Unit::TestCase
    def match_base doc
      require "rexml/parsers/baseparser"
      parser1 = REXML::Parsers::BaseParser.new doc
      parser2 = REXML::Parsers::ExpatParser.new doc
      loop do
        event = parser1.pull
        assert_equal event, parser2.pull
        break if event == [:end_document]
      end
    end
  
    def contains_event doc, expected
      parser = REXML::Parsers::ExpatParser.new doc
      event = nil
      loop do
        event = parser.pull
        break if event == expected or event == [:end_document]
      end
      assert_equal expected, event
    end
  
    def test_elements
      match_base "<foo><bar/></foo>"
    end

    def test_attr
      match_base "<foo a=\"b\" c='d'/>"
    end

    def test_xmlns
      match_base "<foo xmlns:a='http://example.org/'><a:bar/></foo>"
      match_base "<foo xmlns:a='http://example.org/'><bar a:b='x'/></foo>"
    end
  
    def test_text
      match_base "<foo>2</foo>"
    end
  
    def test_cdata
      match_base "<foo><![CDATA[text]]></foo>"
    end
  
    def test_pi
      match_base "<?foo bar='baz'?><bar/>"
    end
  
    def test_comment
      match_base "<foo><!-- comment --></foo>"
    end
  
    def test_comment
      match_base "<foo><!-- comment --></foo>"
    end
  
    def test_xml_decl
      match_base '<?xml version="1.0" encoding="UTF-8" standalone="no"?><foo/>'
    end
  
    def test_parser_error
      contains_event '<foo', []
    rescue REXML::ParseException
      assert true
    end
  
    def test_entity
      contains_event '<foo>&lt;</foo>', [:text, '<']
      contains_event '<foo>&#32;</foo>', [:text, ' ']
      contains_event '<foo>&#x41;</foo>', [:text, 'A']
    end
  
    def test_file
      require "stringio"
      file = StringIO.new
      file.write "<foo/>"
      file.rewind
      contains_event file, [:end_element, "foo"]
    end
  
    # NOT SUPPORTED BELOW THIS LINE
   
    def TBD_doctype
      match_base '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtl"/>'
    end
  end
end
