Index: test/test_blankslate.rb =================================================================== --- test/test_blankslate.rb (revision 110) +++ test/test_blankslate.rb (working copy) @@ -71,6 +71,13 @@ # Test case for blank slate. # class TestBlankSlate < Test::Unit::TestCase + if Object::const_defined?(:BasicObject) + def self.suite + # skip tests if :BasicObject is present + Test::Unit::TestSuite.new(name) + end + end + def setup @bs = BlankSlate.new end Index: test/test_xchar.rb =================================================================== --- test/test_xchar.rb (revision 110) +++ test/test_xchar.rb (working copy) @@ -3,7 +3,28 @@ require 'test/unit' require 'builder/xchar' +if String.method_defined?(:encode) + class String + ENCODING_BINARY = Encoding.find('BINARY') + + # shim method for testing purposes + def to_xs(escape=true) + raise NameError.new('to_xs') unless caller[0].index(__FILE__) + + result = Builder::XChar.encode(self) + if escape + result.gsub(/[^\u0000-\u007F]/) {|c| "&##{c.ord};"} + else + # really only useful for testing purposes + result.force_encoding(ENCODING_BINARY) + end + end + end +end + class TestXmlEscaping < Test::Unit::TestCase + REPLACEMENT_CHAR = Builder::XChar::REPLACEMENT_CHAR.to_xs + def test_ascii assert_equal 'abc', 'abc'.to_xs end @@ -15,9 +36,9 @@ end def test_invalid - assert_equal '*', "\x00".to_xs # null - assert_equal '*', "\x0C".to_xs # form feed - assert_equal '*', "\xEF\xBF\xBF".to_xs # U+FFFF + assert_equal REPLACEMENT_CHAR, "\x00".to_xs # null + assert_equal REPLACEMENT_CHAR, "\x0C".to_xs # form feed + assert_equal REPLACEMENT_CHAR, "\xEF\xBF\xBF".to_xs # U+FFFF end def test_iso_8859_1 Index: test/test_markupbuilder.rb =================================================================== --- test/test_markupbuilder.rb (revision 110) +++ test/test_markupbuilder.rb (working copy) @@ -118,7 +118,7 @@ end def test_ambiguous_markup - ex = assert_raises(ArgumentError) { + ex = assert_raise(ArgumentError) { @xml.h1("data1") { b } } assert_match /\btext\b/, ex.message @@ -228,6 +228,22 @@ assert_match //m, xml.target! end + def test_ensure + xml = Builder::XmlMarkup.new + xml.html do + xml.body do + begin + xml.p do + raise Exception.new('boom') + end + rescue Exception => e + xml.pre e + end + end + end + assert_match %r{

}, xml.target! + assert_match %r{

}, xml.target! + end end class TestDeclarations < Test::Unit::TestCase @@ -334,10 +350,10 @@ end def test_no_blocks - assert_raises(Builder::IllegalBlockError) do + assert_raise(Builder::IllegalBlockError) do @xml.instruct! { |x| x.hi } end - assert_raises(Builder::IllegalBlockError) do + assert_raise(Builder::IllegalBlockError) do @xml.comment!(:element) { |x| x.hi } end end @@ -378,58 +394,83 @@ end class TestUtfMarkup < Test::Unit::TestCase - def setup - @old_kcode = $KCODE - end + if ! String.method_defined?(:encode) + def setup + @old_kcode = $KCODE + end - def teardown - $KCODE = @old_kcode - end + def teardown + $KCODE = @old_kcode + end - def test_use_entities_if_no_encoding_is_given_and_kcode_is_none - $KCODE = 'NONE' - xml = Builder::XmlMarkup.new - xml.p("\xE2\x80\x99") - assert_match(%r(

), xml.target!) # + def test_use_entities_if_no_encoding_is_given_and_kcode_is_none + $KCODE = 'NONE' + xml = Builder::XmlMarkup.new + xml.p("\xE2\x80\x99") + assert_match(%r(

), xml.target!) # + end + + def test_use_entities_if_encoding_is_utf_but_kcode_is_not + $KCODE = 'NONE' + xml = Builder::XmlMarkup.new + xml.instruct!(:xml, :encoding => 'UTF-8') + xml.p("\xE2\x80\x99") + assert_match(%r(

), xml.target!) # + end + else + # change in behavior. As there is no $KCODE anymore, the default + # moves from "does not understand utf-8" to "supports utf-8". + + def test_use_entities_if_no_encoding_is_given_and_kcode_is_none + xml = Builder::XmlMarkup.new + xml.p("\xE2\x80\x99") + assert_match("

\u2019

", xml.target!) # + end + + def test_use_entities_if_encoding_is_utf_but_kcode_is_not + xml = Builder::XmlMarkup.new + xml.instruct!(:xml, :encoding => 'UTF-8') + xml.p("\xE2\x80\x99") + assert_match("

\u2019

", xml.target!) # + end end - def test_use_entities_if_encoding_is_utf_but_kcode_is_not - $KCODE = 'NONE' - xml = Builder::XmlMarkup.new - xml.instruct!(:xml, :encoding => 'UTF-8') - xml.p("\xE2\x80\x99") - assert_match(%r(

), xml.target!) # + def encode string, encoding + if !String.method_defined?(:encode) + $KCODE = encoding + string + elsif encoding == 'UTF8' + string.force_encoding('UTF-8') + else + string + end end def test_use_entities_if_kcode_is_utf_but_encoding_is_something_else - $KCODE = 'UTF8' xml = Builder::XmlMarkup.new xml.instruct!(:xml, :encoding => 'UTF-16') - xml.p("\xE2\x80\x99") + xml.p(encode("\xE2\x80\x99", 'UTF8')) assert_match(%r(

), xml.target!) # end def test_use_utf8_if_encoding_defaults_and_kcode_is_utf8 - $KCODE = 'UTF8' xml = Builder::XmlMarkup.new - xml.p("\xE2\x80\x99") - assert_equal "

\xE2\x80\x99

", xml.target! + xml.p(encode("\xE2\x80\x99",'UTF8')) + assert_equal encode("

\xE2\x80\x99

",'UTF8'), xml.target! end def test_use_utf8_if_both_encoding_and_kcode_are_utf8 - $KCODE = 'UTF8' xml = Builder::XmlMarkup.new xml.instruct!(:xml, :encoding => 'UTF-8') - xml.p("\xE2\x80\x99") - assert_match(%r(

\xE2\x80\x99

), xml.target!) + xml.p(encode("\xE2\x80\x99",'UTF8')) + assert_match encode("

\xE2\x80\x99

",'UTF8'), xml.target! end def test_use_utf8_if_both_encoding_and_kcode_are_utf8_with_lowercase - $KCODE = 'UTF8' xml = Builder::XmlMarkup.new xml.instruct!(:xml, :encoding => 'utf-8') - xml.p("\xE2\x80\x99") - assert_match(%r(

\xE2\x80\x99

), xml.target!) + xml.p(encode("\xE2\x80\x99",'UTF8')) + assert_match encode("

\xE2\x80\x99

",'UTF8'), xml.target! end end Index: Rakefile =================================================================== --- Rakefile (revision 110) +++ Rakefile (working copy) @@ -68,7 +68,7 @@ BLANKSLATE_FILES = FileList[ 'lib/blankslate.rb', - 'test/testblankslate.rb' + 'test/test_blankslate.rb' ] if ! defined?(Gem) Index: lib/builder/blankslate.rb =================================================================== --- lib/builder/blankslate.rb (revision 110) +++ lib/builder/blankslate.rb (working copy) @@ -8,13 +8,16 @@ # above copyright notice is included. #++ -require 'blankslate' - ###################################################################### # BlankSlate has been promoted to a top level name and is now # available as a standalone gem. We make the name available in the # Builder namespace for compatibility. # module Builder - BlankSlate = ::BlankSlate + if Object::const_defined?(:BasicObject) + BlankSlate = ::BasicObject + else + require 'blankslate' + BlankSlate = ::BlankSlate + end end Index: lib/builder/xmlmarkup.rb =================================================================== --- lib/builder/xmlmarkup.rb (revision 110) +++ lib/builder/xmlmarkup.rb (working copy) @@ -195,7 +195,7 @@ end def comment!(comment_text) - _ensure_no_block block_given? + _ensure_no_block ::Kernel::block_given? _special("", comment_text, nil) end @@ -210,13 +210,13 @@ @target << ""1.0", :encoding=>"UTF-8" } attrs = a.merge attrs @@ -261,7 +261,7 @@ # #=> # def cdata!(text) - _ensure_no_block block_given? + _ensure_no_block ::Kernel::block_given? _special("", text, nil) end @@ -313,7 +313,7 @@ def _attr_value(value) case value - when Symbol + when ::Symbol value.to_s else _escape_quote(value.to_s) @@ -322,8 +322,9 @@ def _ensure_no_block(got_block) if got_block - fail IllegalBlockError, - "Blocks are not allowed on XML instructions" + ::Kernel::raise IllegalBlockError.new( + "Blocks are not allowed on XML instructions" + ) end end Index: lib/builder/xchar.rb =================================================================== --- lib/builder/xchar.rb (revision 110) +++ lib/builder/xchar.rb (working copy) @@ -10,14 +10,14 @@ module Builder def self.check_for_name_collision(klass, method_name, defined_constant=nil) - if klass.instance_methods.include?(method_name.to_s) + if klass.method_defined?(method_name.to_s) fail RuntimeError, "Name Collision: Method '#{method_name}' is already defined in #{klass}" end end end -if ! defined?(Builder::XChar) +if ! defined?(Builder::XChar) and ! String.method_defined?(:encode) Builder.check_for_name_collision(String, "to_xs") Builder.check_for_name_collision(Fixnum, "xchr") end @@ -78,42 +78,120 @@ (0xE000..0xFFFD), (0x10000..0x10FFFF) ] + + # http://www.fileformat.info/info/unicode/char/fffd/index.htm + REPLACEMENT_CHAR = + if String.method_defined?(:encode) + "\uFFFD" + elsif $KCODE == 'UTF8' + "\xEF\xBF\xBD" + else + '*' + end end end -###################################################################### -# Enhance the Fixnum class with a XML escaped character conversion. -# -class Fixnum - XChar = Builder::XChar if ! defined?(XChar) +if String.method_defined?(:encode) + module Builder + module XChar # :nodoc: + CP1252_DIFFERENCES, UNICODE_EQUIVALENT = Builder::XChar::CP1252.each. + inject([[],[]]) {|(domain,range),(key,value)| + [domain << key,range << value] + }.map {|seq| seq.pack('U*').force_encoding('utf-8')} + + XML_PREDEFINED = Regexp.new('[' + + Builder::XChar::PREDEFINED.keys.pack('U*').force_encoding('utf-8') + + ']') + + INVALID_XML_CHAR = Regexp.new('[^'+ + Builder::XChar::VALID.map { |item| + case item + when Fixnum + [item].pack('U').force_encoding('utf-8') + when Range + [item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8') + end + }.join + + ']') + + ENCODING_BINARY = Encoding.find('BINARY') + ENCODING_UTF8 = Encoding.find('UTF-8') + ENCODING_ISO1 = Encoding.find('ISO-8859-1') - # XML escaped version of chr. When escape is set to false - # the CP1252 fix is still applied but utf-8 characters are not - # converted to character entities. - def xchr(escape=true) - n = XChar::CP1252[self] || self - case n when *XChar::VALID - XChar::PREDEFINED[n] or (n<128 ? n.chr : (escape ? "&##{n};" : [n].pack('U*'))) - else - '*' + # convert a string to valid UTF-8, compensating for a number of + # common errors. + def XChar.unicode(string) + if string.encoding == ENCODING_BINARY + if string.ascii_only? + string + else + string = string.clone.force_encoding(ENCODING_UTF8) + if string.valid_encoding? + string + else + string.encode(ENCODING_UTF8, ENCODING_ISO1) + end + end + + elsif string.encoding == ENCODING_UTF8 + if string.valid_encoding? + string + else + string.encode(ENCODING_UTF8, ENCODING_ISO1) + end + + else + string.encode(ENCODING_UTF8) + end + end + + # encode a string per XML rules + def XChar.encode(string) + unicode(string). + tr(CP1252_DIFFERENCES, UNICODE_EQUIVALENT). + gsub(INVALID_XML_CHAR, REPLACEMENT_CHAR). + gsub(XML_PREDEFINED) {|c| PREDEFINED[c.ord]} + end end end -end +else -###################################################################### -# Enhance the String class with a XML escaped character version of -# to_s. -# -class String - # XML escaped version of to_s. When escape is set to false - # the CP1252 fix is still applied but utf-8 characters are not - # converted to character entities. - def to_xs(escape=true) - unpack('U*').map {|n| n.xchr(escape)}.join # ASCII, UTF-8 - rescue - unpack('C*').map {|n| n.xchr}.join # ISO-8859-1, WIN-1252 + ###################################################################### + # Enhance the Fixnum class with a XML escaped character conversion. + # + class Fixnum + XChar = Builder::XChar if ! defined?(XChar) + + # XML escaped version of chr. When escape is set to false + # the CP1252 fix is still applied but utf-8 characters are not + # converted to character entities. + def xchr(escape=true) + n = XChar::CP1252[self] || self + case n when *XChar::VALID + XChar::PREDEFINED[n] or + (n<128 ? n.chr : (escape ? "&##{n};" : [n].pack('U*'))) + else + Builder::XChar::REPLACEMENT_CHAR + end + end end + + + ###################################################################### + # Enhance the String class with a XML escaped character version of + # to_s. + # + class String + # XML escaped version of to_s. When escape is set to false + # the CP1252 fix is still applied but utf-8 characters are not + # converted to character entities. + def to_xs(escape=true) + unpack('U*').map {|n| n.xchr(escape)}.join # ASCII, UTF-8 + rescue + unpack('C*').map {|n| n.xchr}.join # ISO-8859-1, WIN-1252 + end + end end Index: lib/builder/css.rb =================================================================== --- lib/builder/css.rb (revision 110) +++ lib/builder/css.rb (working copy) @@ -136,14 +136,14 @@ end def id!(arg, &block) - _start_container('#'+arg.to_s, nil, block_given?) + _start_container('#'+arg.to_s, nil, ::Kernel.block_given?) _css_block(block) if block _unify_block self end def class!(arg, &block) - _start_container('.'+arg.to_s, nil, block_given?) + _start_container('.'+arg.to_s, nil, ::Kernel.block_given?) _css_block(block) if block _unify_block self @@ -169,7 +169,7 @@ end def method_missing(sym, *args, &block) - sym = "#{sym}:#{args.shift}" if args.first.kind_of?(Symbol) + sym = "#{sym}:#{args.shift}" if args.first.kind_of?(::Symbol) if block _start_container(sym, args.first) _css_block(block) Index: lib/builder/xmlbase.rb =================================================================== --- lib/builder/xmlbase.rb (revision 110) +++ lib/builder/xmlbase.rb (working copy) @@ -40,10 +40,10 @@ def method_missing(sym, *args, &block) text = nil attrs = nil - sym = "#{sym}:#{args.shift}" if args.first.kind_of?(Symbol) + sym = "#{sym}:#{args.shift}" if args.first.kind_of?(::Symbol) args.each do |arg| case arg - when Hash + when ::Hash attrs ||= {} attrs.merge!(arg) else @@ -53,15 +53,19 @@ end if block unless text.nil? - raise ArgumentError, "XmlMarkup cannot mix a text argument with a block" + ::Kernel::raise ::ArgumentError, + "XmlMarkup cannot mix a text argument with a block" end _indent _start_tag(sym, attrs) _newline - _nested_structures(block) - _indent - _end_tag(sym) - _newline + begin + _nested_structures(block) + ensure + _indent + _end_tag(sym) + _newline + end elsif text.nil? _indent _start_tag(sym, attrs, true) @@ -114,8 +118,22 @@ private require 'builder/xchar' - def _escape(text) - text.to_xs((@encoding != 'utf-8' or $KCODE != 'UTF8')) + if ::String.method_defined?(:encode) + def _escape(text) + result = XChar.encode(text) + begin + result.encode(@encoding) + rescue + # if the encoding can't be supported, use numeric character references + result. + gsub(/[^\u0000-\u007F]/) {|c| "&##{c.ord};"}. + force_encoding('ascii') + end + end + else + def _escape(text) + text.to_xs((@encoding != 'utf-8' or $KCODE != 'UTF8')) + end end def _escape_quote(text)