require "rexml/document" require 'test/unit' require 'uri' # Parse an atom:title, atom:summary, atom:content, or atom:rights element and # return its contents as text; i.e., unencode that which is encoded, and # return the rest as is. # # This processing model is described in sections 3.1.1 and 4.1.3 of # the Atom Syndication Format. Most of the bits relevant to the subject # at hand can be found at: . if not REXML::Element.public_instance_methods.include? "getBaseURI" class REXML::Element def getBaseURI if not attribute('xml:base') return parent.getBaseURI elsif parent return URI.join(parent.getBaseURI, attribute('xml:base').value).to_s else return attribute('xml:base').value || '' end end end end # TestText provides unit test cases for the text function described above. # # One parse method is defined to take care of the small bit of administrivia # necessary. The remainder are the actual test cases, and all begin with the # string "test_". Each invokes the parse method with a given feed, and makes # an assertion on what the expected results are. class TestText < Test::Unit::TestCase @@base = "http://example.com/blog/index.atom" # The parse method takes care of the following: # * parsing the feed as XML # * finding the first link element # * extracting the href attribute from it # * resolving the href def parse feed, base=@@base doc = REXML::Document.new feed ns = {"atom" => "http://www.w3.org/2005/Atom"} link = REXML::XPath.first doc, "//atom:link", ns doc.add_attribute('xml:base', base) base = link.getBaseURI.to_s return URI.join(base, link.attribute('href').value).to_s end def test_no_xml_base_abs assert_equal("http://example.org/archives/2005/08/123.html",parse(<<-END)) END end def test_no_xml_base_rel assert_equal("http://example.com/archives/2005/08/123.html",parse(<<-END)) END end def test_xml_base_on_feed_and_entry assert_equal("http://example.com/archives/2005/08/123.html",parse(<<-END)) END end def test_xml_base_overridden_on_link assert_equal("http://example.com/123.html",parse(<<-END)) END end end