#!/usr/bin/ruby -w

require 'json'
$KCODE = 'UTF8'

# http://www.tbray.org/ongoing/When/200x/2003/04/26/UTF
FAIHU="\xf0\x90\x8d\x86"

# simple test to see if FAIHU can round-trip a conversion to JSON
# note: what the second line of output actually displays may depend on
# your operating system's LANG/code-page and what fonts you have installed
def faihu_test
  puts "  " + JSON::unparse(FAIHU)
  puts "  " + JSON::parse(JSON::unparse(FAIHU)).inspect
  puts "  " + (JSON::parse(JSON::unparse(FAIHU)) == FAIHU).to_s
rescue
  puts "  #{$!.class}: #{$!}"
end

puts "\nBefore:"; faihu_test

# Warning: monkey-patch ahead!
module JSON

  class Parser < StringScanner
    # modify Regexp to also grab consecutive sequences of Unicode escape
    # sequences
    alias parse_string_BMP parse_string
    def parse_string
      if scan(STRING)
        return '' if self[1].empty?
        self[1].gsub(%r(\\(?:[\\bfnrt"/]|u([A-Fa-f\d]{4}(\\u[A-Fa-f\d]{4})*)))) do
          case $~[0]
          when '\\\\' then '\\'
          when '\\b'  then "\b"
          when '\\f'  then "\f"
          when '\\n'  then "\n"
          when '\\r'  then "\r"
          when '\\t'  then "\t"
          when '\\"'  then '"'
          when '\\/'  then '/'
          else
            if JSON.support_unicode? and $KCODE == 'UTF8'
              JSON.utf16_to_utf8($~[1])
            else
              # if utf8 mode is switched off or unicode not supported, try to
              # transform unicode \u-notation to bytes directly:
              $~[1].to_i(16).chr
            end
          end
        end
      end
    end
  end

  module_function

  # modify output to insert "\u" after every fourth character
  alias utf8_to_utf16_BMP utf8_to_utf16
  def utf8_to_utf16(string)
    bytes = JSON::UTF8toUTF16.iconv(string).unpack('H*')[0]
    bytes = bytes.scan(/..../n).join('\u') if bytes.length > 4
    bytes
  end

  # handle consecutive strings of unicode escape sequences
  alias utf16_to_utf8_BMP utf16_to_utf8
  def utf16_to_utf8(string)
    if string.length == 4
      bytes = '' << string[0, 2].to_i(16) << string[2, 2].to_i(16)
    else
      bytes = string.scan(/../).reject{|c| c=='\u'}.
        map{|c| c.to_i(16)}.inject('') {|s,n| s<<n}
    end
    JSON::UTF16toUTF8.iconv(bytes)
  end

end

puts "\nAfter:"; faihu_test
