require 'rubygems'
require 'nokogiri'

TESTDATA = "#{ENV['HOME']}/hg/html5lib/testdata"
Dir.chdir "#{TESTDATA}/tree-construction"
Dir['*.dat'].each do |file|

  open(file).read.split(/^$/).each do |test|
    next if test == "\n#data\n"
    test = Hash[*test.split(/^#(\w+)\n/)[1..-1]]

    actual = ''
    prefix = '| '

    walker = lambda do |node|
      if node.element?
        attrs = Hash[node.attributes.map {|n,v| [n,v.to_s]}]
        actual << "#{prefix}<#{node.name}>\n"
        attrs.each do |n,v|
          actual << "#{prefix}  #{n}=#{v.inspect}\n"
        end
        save, prefix = prefix, prefix + '  '
        node.children.each(&walker)
        prefix = save
      elsif node.text?
        actual << "#{prefix}#{node.text.inspect}\n"
      elsif node.comment?
        actual << "#{prefix}<!-- #{node.text} -->\n"
        # actual << ['Comment', node.text]
      elsif node.is_a? Nokogiri::XML::DTD
        # actual << ['DOCTYPE', node.name, nil, nil, true]
      else
        actual << "#{prefix}#{node.to_s}\n"
      end
    end

    doc = Nokogiri::HTML(test['data'].to_s.strip)

    doc.children.each(&walker)

    expected =  test['document']

    actual = $1 + actual if expected =~ /^(\| <!DOCTYPE.*?>\n)/
    actual = "|\n" if actual.empty?

    if actual =~ /\| <html>\n\|   <body>/
       actual[/\| <html>\n()\|   <body>/,1] = "|   <head>\n"
    end

    if actual != expected
      puts
      puts "#input"
      puts test['data']
      puts "#actual"
      puts actual
      puts "#expected"
      puts expected
    end
  end
end

