603 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			603 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| # encoding: UTF-8
 | |
| #
 | |
| # Copyright 2011, 2012 Keith Rarick
 | |
| #
 | |
| # Permission is hereby granted, free of charge, to any person obtaining a copy
 | |
| # of this software and associated documentation files (the "Software"), to deal
 | |
| # in the Software without restriction, including without limitation the rights
 | |
| # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | |
| # copies of the Software, and to permit persons to whom the Software is
 | |
| # furnished to do so, subject to the following conditions:
 | |
| #
 | |
| # The above copyright notice and this permission notice shall be included in
 | |
| # all copies or substantial portions of the Software.
 | |
| #
 | |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | |
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | |
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | |
| # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | |
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | |
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | |
| # THE SOFTWARE.
 | |
| 
 | |
| # See https://github.com/kr/okjson for updates.
 | |
| 
 | |
| require 'stringio'
 | |
| 
 | |
| # Some parts adapted from
 | |
| # http://golang.org/src/pkg/json/decode.go and
 | |
| # http://golang.org/src/pkg/utf8/utf8.go
 | |
| module Vendor
 | |
|   module OkJson
 | |
|     Upstream = '43'
 | |
|     extend self
 | |
| 
 | |
| 
 | |
|     # Decodes a json document in string s and
 | |
|     # returns the corresponding ruby value.
 | |
|     # String s must be valid UTF-8. If you have
 | |
|     # a string in some other encoding, convert
 | |
|     # it first.
 | |
|     #
 | |
|     # String values in the resulting structure
 | |
|     # will be UTF-8.
 | |
|     def decode(s)
 | |
|       ts = lex(s)
 | |
|       v, ts = textparse(ts)
 | |
|       if ts.length > 0
 | |
|         raise Error, 'trailing garbage'
 | |
|       end
 | |
|       v
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Encodes x into a json text. It may contain only
 | |
|     # Array, Hash, String, Numeric, true, false, nil.
 | |
|     # (Note, this list excludes Symbol.)
 | |
|     # X itself must be an Array or a Hash.
 | |
|     # No other value can be encoded, and an error will
 | |
|     # be raised if x contains any other value, such as
 | |
|     # Nan, Infinity, Symbol, and Proc, or if a Hash key
 | |
|     # is not a String.
 | |
|     # Strings contained in x must be valid UTF-8.
 | |
|     def encode(x)
 | |
|       case x
 | |
|       when Hash    then objenc(x)
 | |
|       when Array   then arrenc(x)
 | |
|       else
 | |
|         raise Error, 'root value must be an Array or a Hash'
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def valenc(x)
 | |
|       case x
 | |
|       when Hash    then objenc(x)
 | |
|       when Array   then arrenc(x)
 | |
|       when String  then strenc(x)
 | |
|       when Numeric then numenc(x)
 | |
|       when true    then "true"
 | |
|       when false   then "false"
 | |
|       when nil     then "null"
 | |
|       else
 | |
|         raise Error, "cannot encode #{x.class}: #{x.inspect}"
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|   private
 | |
| 
 | |
| 
 | |
|     # Parses a "json text" in the sense of RFC 4627.
 | |
|     # Returns the parsed value and any trailing tokens.
 | |
|     # Note: this is almost the same as valparse,
 | |
|     # except that it does not accept atomic values.
 | |
|     def textparse(ts)
 | |
|       if ts.length <= 0
 | |
|         raise Error, 'empty'
 | |
|       end
 | |
| 
 | |
|       typ, _, val = ts[0]
 | |
|       case typ
 | |
|       when '{' then objparse(ts)
 | |
|       when '[' then arrparse(ts)
 | |
|       else
 | |
|         raise Error, "unexpected #{val.inspect}"
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Parses a "value" in the sense of RFC 4627.
 | |
|     # Returns the parsed value and any trailing tokens.
 | |
|     def valparse(ts)
 | |
|       if ts.length <= 0
 | |
|         raise Error, 'empty'
 | |
|       end
 | |
| 
 | |
|       typ, _, val = ts[0]
 | |
|       case typ
 | |
|       when '{' then objparse(ts)
 | |
|       when '[' then arrparse(ts)
 | |
|       when :val,:str then [val, ts[1..-1]]
 | |
|       else
 | |
|         raise Error, "unexpected #{val.inspect}"
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Parses an "object" in the sense of RFC 4627.
 | |
|     # Returns the parsed value and any trailing tokens.
 | |
|     def objparse(ts)
 | |
|       ts = eat('{', ts)
 | |
|       obj = {}
 | |
| 
 | |
|       if ts[0][0] == '}'
 | |
|         return obj, ts[1..-1]
 | |
|       end
 | |
| 
 | |
|       k, v, ts = pairparse(ts)
 | |
|       obj[k] = v
 | |
| 
 | |
|       if ts[0][0] == '}'
 | |
|         return obj, ts[1..-1]
 | |
|       end
 | |
| 
 | |
|       loop do
 | |
|         ts = eat(',', ts)
 | |
| 
 | |
|         k, v, ts = pairparse(ts)
 | |
|         obj[k] = v
 | |
| 
 | |
|         if ts[0][0] == '}'
 | |
|           return obj, ts[1..-1]
 | |
|         end
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Parses a "member" in the sense of RFC 4627.
 | |
|     # Returns the parsed values and any trailing tokens.
 | |
|     def pairparse(ts)
 | |
|       (typ, _, k), ts = ts[0], ts[1..-1]
 | |
|       if typ != :str
 | |
|         raise Error, "unexpected #{k.inspect}"
 | |
|       end
 | |
|       ts = eat(':', ts)
 | |
|       v, ts = valparse(ts)
 | |
|       [k, v, ts]
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Parses an "array" in the sense of RFC 4627.
 | |
|     # Returns the parsed value and any trailing tokens.
 | |
|     def arrparse(ts)
 | |
|       ts = eat('[', ts)
 | |
|       arr = []
 | |
| 
 | |
|       if ts[0][0] == ']'
 | |
|         return arr, ts[1..-1]
 | |
|       end
 | |
| 
 | |
|       v, ts = valparse(ts)
 | |
|       arr << v
 | |
| 
 | |
|       if ts[0][0] == ']'
 | |
|         return arr, ts[1..-1]
 | |
|       end
 | |
| 
 | |
|       loop do
 | |
|         ts = eat(',', ts)
 | |
| 
 | |
|         v, ts = valparse(ts)
 | |
|         arr << v
 | |
| 
 | |
|         if ts[0][0] == ']'
 | |
|           return arr, ts[1..-1]
 | |
|         end
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def eat(typ, ts)
 | |
|       if ts[0][0] != typ
 | |
|         raise Error, "expected #{typ} (got #{ts[0].inspect})"
 | |
|       end
 | |
|       ts[1..-1]
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Scans s and returns a list of json tokens,
 | |
|     # excluding white space (as defined in RFC 4627).
 | |
|     def lex(s)
 | |
|       ts = []
 | |
|       while s.length > 0
 | |
|         typ, lexeme, val = tok(s)
 | |
|         if typ == nil
 | |
|           raise Error, "invalid character at #{s[0,10].inspect}"
 | |
|         end
 | |
|         if typ != :space
 | |
|           ts << [typ, lexeme, val]
 | |
|         end
 | |
|         s = s[lexeme.length..-1]
 | |
|       end
 | |
|       ts
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Scans the first token in s and
 | |
|     # returns a 3-element list, or nil
 | |
|     # if s does not begin with a valid token.
 | |
|     #
 | |
|     # The first list element is one of
 | |
|     # '{', '}', ':', ',', '[', ']',
 | |
|     # :val, :str, and :space.
 | |
|     #
 | |
|     # The second element is the lexeme.
 | |
|     #
 | |
|     # The third element is the value of the
 | |
|     # token for :val and :str, otherwise
 | |
|     # it is the lexeme.
 | |
|     def tok(s)
 | |
|       case s[0]
 | |
|       when ?{ then ['{', s[0,1], s[0,1]]
 | |
|       when ?} then ['}', s[0,1], s[0,1]]
 | |
|       when ?: then [':', s[0,1], s[0,1]]
 | |
|       when ?, then [',', s[0,1], s[0,1]]
 | |
|       when ?[ then ['[', s[0,1], s[0,1]]
 | |
|       when ?] then [']', s[0,1], s[0,1]]
 | |
|       when ?n then nulltok(s)
 | |
|       when ?t then truetok(s)
 | |
|       when ?f then falsetok(s)
 | |
|       when ?" then strtok(s)
 | |
|       when Spc, ?\t, ?\n, ?\r then [:space, s[0,1], s[0,1]]
 | |
|       else
 | |
|         numtok(s)
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def nulltok(s);  s[0,4] == 'null'  ? [:val, 'null',  nil]   : [] end
 | |
|     def truetok(s);  s[0,4] == 'true'  ? [:val, 'true',  true]  : [] end
 | |
|     def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end
 | |
| 
 | |
| 
 | |
|     def numtok(s)
 | |
|       m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
 | |
|       if m && m.begin(0) == 0
 | |
|         if !m[2] && !m[3]
 | |
|           [:val, m[0], Integer(m[0])]
 | |
|         elsif m[2]
 | |
|           [:val, m[0], Float(m[0])]
 | |
|         else
 | |
|           [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))]
 | |
|         end
 | |
|       else
 | |
|         []
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def strtok(s)
 | |
|       m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
 | |
|       if ! m
 | |
|         raise Error, "invalid string literal at #{abbrev(s)}"
 | |
|       end
 | |
|       [:str, m[0], unquote(m[0])]
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def abbrev(s)
 | |
|       t = s[0,10]
 | |
|       p = t['`']
 | |
|       t = t[0,p] if p
 | |
|       t = t + '...' if t.length < s.length
 | |
|       '`' + t + '`'
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Converts a quoted json string literal q into a UTF-8-encoded string.
 | |
|     # The rules are different than for Ruby, so we cannot use eval.
 | |
|     # Unquote will raise an error if q contains control characters.
 | |
|     def unquote(q)
 | |
|       q = q[1...-1]
 | |
|       a = q.dup # allocate a big enough string
 | |
|       # In ruby >= 1.9, a[w] is a codepoint, not a byte.
 | |
|       if rubydoesenc?
 | |
|         a.force_encoding('UTF-8')
 | |
|       end
 | |
|       r, w = 0, 0
 | |
|       while r < q.length
 | |
|         c = q[r]
 | |
|         if c == ?\\
 | |
|           r += 1
 | |
|           if r >= q.length
 | |
|             raise Error, "string literal ends with a \"\\\": \"#{q}\""
 | |
|           end
 | |
| 
 | |
|           case q[r]
 | |
|           when ?",?\\,?/,?'
 | |
|             a[w] = q[r]
 | |
|             r += 1
 | |
|             w += 1
 | |
|           when ?b,?f,?n,?r,?t
 | |
|             a[w] = Unesc[q[r]]
 | |
|             r += 1
 | |
|             w += 1
 | |
|           when ?u
 | |
|             r += 1
 | |
|             uchar = begin
 | |
|               hexdec4(q[r,4])
 | |
|             rescue RuntimeError => e
 | |
|               raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}"
 | |
|             end
 | |
|             r += 4
 | |
|             if surrogate? uchar
 | |
|               if q.length >= r+6
 | |
|                 uchar1 = hexdec4(q[r+2,4])
 | |
|                 uchar = subst(uchar, uchar1)
 | |
|                 if uchar != Ucharerr
 | |
|                   # A valid pair; consume.
 | |
|                   r += 6
 | |
|                 end
 | |
|               end
 | |
|             end
 | |
|             if rubydoesenc?
 | |
|               a[w] = '' << uchar
 | |
|               w += 1
 | |
|             else
 | |
|               w += ucharenc(a, w, uchar)
 | |
|             end
 | |
|           else
 | |
|             raise Error, "invalid escape char #{q[r]} in \"#{q}\""
 | |
|           end
 | |
|         elsif c == ?" || c < Spc
 | |
|           raise Error, "invalid character in string literal \"#{q}\""
 | |
|         else
 | |
|           # Copy anything else byte-for-byte.
 | |
|           # Valid UTF-8 will remain valid UTF-8.
 | |
|           # Invalid UTF-8 will remain invalid UTF-8.
 | |
|           # In ruby >= 1.9, c is a codepoint, not a byte,
 | |
|           # in which case this is still what we want.
 | |
|           a[w] = c
 | |
|           r += 1
 | |
|           w += 1
 | |
|         end
 | |
|       end
 | |
|       a[0,w]
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Encodes unicode character u as UTF-8
 | |
|     # bytes in string a at position i.
 | |
|     # Returns the number of bytes written.
 | |
|     def ucharenc(a, i, u)
 | |
|       if u <= Uchar1max
 | |
|         a[i] = (u & 0xff).chr
 | |
|         1
 | |
|       elsif u <= Uchar2max
 | |
|         a[i+0] = (Utag2 | ((u>>6)&0xff)).chr
 | |
|         a[i+1] = (Utagx | (u&Umaskx)).chr
 | |
|         2
 | |
|       elsif u <= Uchar3max
 | |
|         a[i+0] = (Utag3 | ((u>>12)&0xff)).chr
 | |
|         a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr
 | |
|         a[i+2] = (Utagx | (u&Umaskx)).chr
 | |
|         3
 | |
|       else
 | |
|         a[i+0] = (Utag4 | ((u>>18)&0xff)).chr
 | |
|         a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr
 | |
|         a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr
 | |
|         a[i+3] = (Utagx | (u&Umaskx)).chr
 | |
|         4
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def hexdec4(s)
 | |
|       if s.length != 4
 | |
|         raise Error, 'short'
 | |
|       end
 | |
|       (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3])
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def subst(u1, u2)
 | |
|       if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
 | |
|         return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself
 | |
|       end
 | |
|       return Ucharerr
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def surrogate?(u)
 | |
|       Usurr1 <= u && u < Usurr3
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def nibble(c)
 | |
|       if ?0 <= c && c <= ?9 then c.ord - ?0.ord
 | |
|       elsif ?a <= c && c <= ?z then c.ord - ?a.ord + 10
 | |
|       elsif ?A <= c && c <= ?Z then c.ord - ?A.ord + 10
 | |
|       else
 | |
|         raise Error, "invalid hex code #{c}"
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def objenc(x)
 | |
|       '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}'
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def arrenc(a)
 | |
|       '[' + a.map{|x| valenc(x)}.join(',') + ']'
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def keyenc(k)
 | |
|       case k
 | |
|       when String then strenc(k)
 | |
|       else
 | |
|         raise Error, "Hash key is not a string: #{k.inspect}"
 | |
|       end
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def strenc(s)
 | |
|       t = StringIO.new
 | |
|       t.putc(?")
 | |
|       r = 0
 | |
| 
 | |
|       while r < s.length
 | |
|         case s[r]
 | |
|         when ?"  then t.print('\\"')
 | |
|         when ?\\ then t.print('\\\\')
 | |
|         when ?\b then t.print('\\b')
 | |
|         when ?\f then t.print('\\f')
 | |
|         when ?\n then t.print('\\n')
 | |
|         when ?\r then t.print('\\r')
 | |
|         when ?\t then t.print('\\t')
 | |
|         else
 | |
|           c = s[r]
 | |
|           # In ruby >= 1.9, s[r] is a codepoint, not a byte.
 | |
|           if rubydoesenc?
 | |
|             begin
 | |
|               # c.ord will raise an error if c is invalid UTF-8
 | |
|               if c.ord < Spc.ord
 | |
|                 c = "\\u%04x" % [c.ord]
 | |
|               end
 | |
|               t.write(c)
 | |
|             rescue
 | |
|               t.write(Ustrerr)
 | |
|             end
 | |
|           elsif c < Spc
 | |
|             t.write("\\u%04x" % c)
 | |
|           elsif Spc <= c && c <= ?~
 | |
|             t.putc(c)
 | |
|           else
 | |
|             n = ucharcopy(t, s, r) # ensure valid UTF-8 output
 | |
|             r += n - 1 # r is incremented below
 | |
|           end
 | |
|         end
 | |
|         r += 1
 | |
|       end
 | |
|       t.putc(?")
 | |
|       t.string
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def numenc(x)
 | |
|       if ((x.nan? || x.infinite?) rescue false)
 | |
|         raise Error, "Numeric cannot be represented: #{x}"
 | |
|       end
 | |
|       "#{x}"
 | |
|     end
 | |
| 
 | |
| 
 | |
|     # Copies the valid UTF-8 bytes of a single character
 | |
|     # from string s at position i to I/O object t, and
 | |
|     # returns the number of bytes copied.
 | |
|     # If no valid UTF-8 char exists at position i,
 | |
|     # ucharcopy writes Ustrerr and returns 1.
 | |
|     def ucharcopy(t, s, i)
 | |
|       n = s.length - i
 | |
|       raise Utf8Error if n < 1
 | |
| 
 | |
|       c0 = s[i].ord
 | |
| 
 | |
|       # 1-byte, 7-bit sequence?
 | |
|       if c0 < Utagx
 | |
|         t.putc(c0)
 | |
|         return 1
 | |
|       end
 | |
| 
 | |
|       raise Utf8Error if c0 < Utag2 # unexpected continuation byte?
 | |
| 
 | |
|       raise Utf8Error if n < 2 # need continuation byte
 | |
|       c1 = s[i+1].ord
 | |
|       raise Utf8Error if c1 < Utagx || Utag2 <= c1
 | |
| 
 | |
|       # 2-byte, 11-bit sequence?
 | |
|       if c0 < Utag3
 | |
|         raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max
 | |
|         t.putc(c0)
 | |
|         t.putc(c1)
 | |
|         return 2
 | |
|       end
 | |
| 
 | |
|       # need second continuation byte
 | |
|       raise Utf8Error if n < 3
 | |
| 
 | |
|       c2 = s[i+2].ord
 | |
|       raise Utf8Error if c2 < Utagx || Utag2 <= c2
 | |
| 
 | |
|       # 3-byte, 16-bit sequence?
 | |
|       if c0 < Utag4
 | |
|         u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
 | |
|         raise Utf8Error if u <= Uchar2max
 | |
|         t.putc(c0)
 | |
|         t.putc(c1)
 | |
|         t.putc(c2)
 | |
|         return 3
 | |
|       end
 | |
| 
 | |
|       # need third continuation byte
 | |
|       raise Utf8Error if n < 4
 | |
|       c3 = s[i+3].ord
 | |
|       raise Utf8Error if c3 < Utagx || Utag2 <= c3
 | |
| 
 | |
|       # 4-byte, 21-bit sequence?
 | |
|       if c0 < Utag5
 | |
|         u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
 | |
|         raise Utf8Error if u <= Uchar3max
 | |
|         t.putc(c0)
 | |
|         t.putc(c1)
 | |
|         t.putc(c2)
 | |
|         t.putc(c3)
 | |
|         return 4
 | |
|       end
 | |
| 
 | |
|       raise Utf8Error
 | |
|     rescue Utf8Error
 | |
|       t.write(Ustrerr)
 | |
|       return 1
 | |
|     end
 | |
| 
 | |
| 
 | |
|     def rubydoesenc?
 | |
|       ::String.method_defined?(:force_encoding)
 | |
|     end
 | |
| 
 | |
| 
 | |
|     class Utf8Error < ::StandardError
 | |
|     end
 | |
| 
 | |
| 
 | |
|     class Error < ::StandardError
 | |
|     end
 | |
| 
 | |
| 
 | |
|     Utagx = 0b1000_0000
 | |
|     Utag2 = 0b1100_0000
 | |
|     Utag3 = 0b1110_0000
 | |
|     Utag4 = 0b1111_0000
 | |
|     Utag5 = 0b1111_1000
 | |
|     Umaskx = 0b0011_1111
 | |
|     Umask2 = 0b0001_1111
 | |
|     Umask3 = 0b0000_1111
 | |
|     Umask4 = 0b0000_0111
 | |
|     Uchar1max = (1<<7) - 1
 | |
|     Uchar2max = (1<<11) - 1
 | |
|     Uchar3max = (1<<16) - 1
 | |
|     Ucharerr = 0xFFFD # unicode "replacement char"
 | |
|     Ustrerr = "\xef\xbf\xbd" # unicode "replacement char"
 | |
|     Usurrself = 0x10000
 | |
|     Usurr1 = 0xd800
 | |
|     Usurr2 = 0xdc00
 | |
|     Usurr3 = 0xe000
 | |
| 
 | |
|     Spc = ' '[0]
 | |
|     Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
 | |
|   end
 | |
| end
 | 
