require 'rubygems' require 'nokogiri' TESTDATA = "#{ENV['HOME']}/hg/html5lib/testdata" Dir.chdir "#{TESTDATA}/tree-construction" Dir['*.dat'].each do |file| open(file).read.split(/^$/).each do |test| next if test == "\n#data\n" test = Hash[*test.split(/^#(\w+)\n/)[1..-1]] actual = '' prefix = '| ' walker = lambda do |node| if node.element? attrs = Hash[node.attributes.map {|n,v| [n,v.to_s]}] actual << "#{prefix}<#{node.name}>\n" attrs.each do |n,v| actual << "#{prefix} #{n}=#{v.inspect}\n" end save, prefix = prefix, prefix + ' ' node.children.each(&walker) prefix = save elsif node.text? actual << "#{prefix}#{node.text.inspect}\n" elsif node.comment? actual << "#{prefix}\n" # actual << ['Comment', node.text] elsif node.is_a? Nokogiri::XML::DTD # actual << ['DOCTYPE', node.name, nil, nil, true] else actual << "#{prefix}#{node.to_s}\n" end end doc = Nokogiri::HTML(test['data'].to_s.strip) doc.children.each(&walker) expected = test['document'] actual = $1 + actual if expected =~ /^(\| \n)/ actual = "|\n" if actual.empty? if actual =~ /\| \n\| / actual[/\| \n()\| /,1] = "| \n" end if actual != expected puts puts "#input" puts test['data'] puts "#actual" puts actual puts "#expected" puts expected end end end