#!/usr/bin/ruby -w require 'json' $KCODE = 'UTF8' # http://www.tbray.org/ongoing/When/200x/2003/04/26/UTF FAIHU="\xf0\x90\x8d\x86" # simple test to see if FAIHU can round-trip a conversion to JSON # note: what the second line of output actually displays may depend on # your operating system's LANG/code-page and what fonts you have installed def faihu_test puts " " + JSON::unparse(FAIHU) puts " " + JSON::parse(JSON::unparse(FAIHU)).inspect puts " " + (JSON::parse(JSON::unparse(FAIHU)) == FAIHU).to_s rescue puts " #{$!.class}: #{$!}" end puts "\nBefore:"; faihu_test # Warning: monkey-patch ahead! module JSON class Parser < StringScanner # modify Regexp to also grab consecutive sequences of Unicode escape # sequences alias parse_string_BMP parse_string def parse_string if scan(STRING) return '' if self[1].empty? self[1].gsub(%r(\\(?:[\\bfnrt"/]|u([A-Fa-f\d]{4}(\\u[A-Fa-f\d]{4})*)))) do case $~[0] when '\\\\' then '\\' when '\\b' then "\b" when '\\f' then "\f" when '\\n' then "\n" when '\\r' then "\r" when '\\t' then "\t" when '\\"' then '"' when '\\/' then '/' else if JSON.support_unicode? and $KCODE == 'UTF8' JSON.utf16_to_utf8($~[1]) else # if utf8 mode is switched off or unicode not supported, try to # transform unicode \u-notation to bytes directly: $~[1].to_i(16).chr end end end end end end module_function # modify output to insert "\u" after every fourth character alias utf8_to_utf16_BMP utf8_to_utf16 def utf8_to_utf16(string) bytes = JSON::UTF8toUTF16.iconv(string).unpack('H*')[0] bytes = bytes.scan(/..../n).join('\u') if bytes.length > 4 bytes end # handle consecutive strings of unicode escape sequences alias utf16_to_utf8_BMP utf16_to_utf8 def utf16_to_utf8(string) if string.length == 4 bytes = '' << string[0, 2].to_i(16) << string[2, 2].to_i(16) else bytes = string.scan(/../).reject{|c| c=='\u'}. map{|c| c.to_i(16)}.inject('') {|s,n| s<