--- /dev/null
+# Copyright (c) 2010 Nicira Networks
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import StringIO
+import sys
+
+escapes = {ord('"'): u"\\\"",
+ ord("\\"): u"\\\\",
+ ord("\b"): u"\\b",
+ ord("\f"): u"\\f",
+ ord("\n"): u"\\n",
+ ord("\r"): u"\\r",
+ ord("\t"): u"\\t"}
+for i in range(32):
+ if i not in escapes:
+ escapes[i] = u"\\u%04x" % i
+
+def __dump_string(stream, s):
+ stream.write(u"\"")
+ for c in s:
+ x = ord(c)
+ escape = escapes.get(x)
+ if escape:
+ stream.write(escape)
+ else:
+ stream.write(c)
+ stream.write(u"\"")
+
+def to_stream(obj, stream, pretty=False, sort_keys=True):
+ if obj is None:
+ stream.write(u"null")
+ elif obj is False:
+ stream.write(u"false")
+ elif obj is True:
+ stream.write(u"true")
+ elif type(obj) in (int, long):
+ stream.write(u"%d" % obj)
+ elif type(obj) == float:
+ stream.write("%.15g" % obj)
+ elif type(obj) == unicode:
+ __dump_string(stream, obj)
+ elif type(obj) == str:
+ __dump_string(stream, unicode(obj))
+ elif type(obj) == dict:
+ stream.write(u"{")
+ if sort_keys:
+ items = sorted(obj.items())
+ else:
+ items = obj.iteritems()
+ i = 0
+ for key, value in items:
+ if i > 0:
+ stream.write(u",")
+ i += 1
+ __dump_string(stream, unicode(key))
+ stream.write(u":")
+ to_stream(value, stream, pretty, sort_keys)
+ stream.write(u"}")
+ elif type(obj) in (list, tuple):
+ stream.write(u"[")
+ i = 0
+ for value in obj:
+ if i > 0:
+ stream.write(u",")
+ i += 1
+ to_stream(value, stream, pretty, sort_keys)
+ stream.write(u"]")
+ else:
+ raise Error("can't serialize %s as JSON" % obj)
+
+def to_file(obj, name, pretty=False, sort_keys=True):
+ stream = open(name, "w")
+ try:
+ to_stream(obj, stream, pretty, sort_keys)
+ finally:
+ stream.close()
+
+def to_string(obj, pretty=False, sort_keys=True):
+ output = StringIO.StringIO()
+ to_stream(obj, output, pretty, sort_keys)
+ s = output.getvalue()
+ output.close()
+ return s
+
+def from_stream(stream):
+ p = Parser(check_trailer=True)
+ while True:
+ buf = stream.read(4096)
+ if buf == "" or p.feed(buf) != len(buf):
+ break
+ return p.finish()
+
+def from_file(name):
+ stream = open(name, "r")
+ try:
+ return from_stream(stream)
+ finally:
+ stream.close()
+
+def from_string(s):
+ try:
+ s = unicode(s, 'utf-8')
+ except UnicodeDecodeError, e:
+ seq = ' '.join(["0x%2x" % ord(c) for c in e.object[e.start:e.end]])
+ raise Error("\"%s\" is not a valid UTF-8 string: "
+ "invalid UTF-8 sequence %s" % (s, seq),
+ tag="constraint violation")
+ p = Parser(check_trailer=True)
+ p.feed(s)
+ return p.finish()
+
+class Parser(object):
+ ## Maximum height of parsing stack. ##
+ MAX_HEIGHT = 1000
+
+ def __init__(self, check_trailer=False):
+ self.check_trailer = check_trailer
+
+ # Lexical analysis.
+ self.lex_state = Parser.__lex_start
+ self.buffer = ""
+ self.line_number = 0
+ self.column_number = 0
+ self.byte_number = 0
+
+ # Parsing.
+ self.parse_state = Parser.__parse_start
+ self.stack = []
+ self.member_name = None
+
+ # Parse status.
+ self.done = False
+ self.error = None
+
+ def __lex_start_space(self, c):
+ pass
+ def __lex_start_alpha(self, c):
+ self.buffer = c
+ self.lex_state = Parser.__lex_keyword
+ def __lex_start_token(self, c):
+ self.__parser_input(c)
+ def __lex_start_number(self, c):
+ self.buffer = c
+ self.lex_state = Parser.__lex_number
+ def __lex_start_string(self, c):
+ self.lex_state = Parser.__lex_string
+ def __lex_start_error(self, c):
+ if ord(c) >= 32 and ord(c) < 128:
+ self.__error("invalid character '%s'" % c)
+ else:
+ self.__error("invalid character U+%04x" % ord(c))
+
+ __lex_start_actions = {}
+ for c in " \t\n\r":
+ __lex_start_actions[c] = __lex_start_space
+ for c in "abcdefghijklmnopqrstuvwxyz":
+ __lex_start_actions[c] = __lex_start_alpha
+ for c in "[{]}:,":
+ __lex_start_actions[c] = __lex_start_token
+ for c in "-0123456789":
+ __lex_start_actions[c] = __lex_start_number
+ __lex_start_actions['"'] = __lex_start_string
+ def __lex_start(self, c):
+ Parser.__lex_start_actions.get(
+ c, Parser.__lex_start_error)(self, c)
+ return True
+
+ __lex_alpha = {}
+ for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
+ __lex_alpha[c] = True
+ def __lex_finish_keyword(self):
+ if self.buffer == "false":
+ self.__parser_input(False)
+ elif self.buffer == "true":
+ self.__parser_input(True)
+ elif self.buffer == "null":
+ self.__parser_input(None)
+ else:
+ self.__error("invalid keyword '%s'" % self.buffer)
+ def __lex_keyword(self, c):
+ if c in Parser.__lex_alpha:
+ self.buffer += c
+ return True
+ else:
+ self.__lex_finish_keyword()
+ return False
+
+ __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
+ def __lex_finish_number(self):
+ s = self.buffer
+ m = Parser.__number_re.match(s)
+ if m:
+ sign, integer, fraction, exp = m.groups()
+ if (exp is not None and
+ (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
+ self.__error("exponent outside valid range")
+ return
+
+ if fraction is not None and len(fraction.lstrip('0')) == 0:
+ fraction = None
+
+ sig_string = integer
+ if fraction is not None:
+ sig_string += fraction
+ significand = int(sig_string)
+
+ pow10 = 0
+ if fraction is not None:
+ pow10 -= len(fraction)
+ if exp is not None:
+ pow10 += long(exp)
+
+ if significand == 0:
+ self.__parser_input(0)
+ return
+ elif significand <= 2**63:
+ while pow10 > 0 and significand <= 2*63:
+ significand *= 10
+ pow10 -= 1
+ while pow10 < 0 and significand % 10 == 0:
+ significand /= 10
+ pow10 += 1
+ if (pow10 == 0 and
+ ((not sign and significand < 2**63) or
+ (sign and significand <= 2**63))):
+ if sign:
+ self.__parser_input(-significand)
+ else:
+ self.__parser_input(significand)
+ return
+
+ value = float(s)
+ if value == float("inf") or value == float("-inf"):
+ self.__error("number outside valid range")
+ return
+ if value == 0:
+ # Suppress negative zero.
+ value = 0
+ self.__parser_input(value)
+ elif re.match("-?0[0-9]", s):
+ self.__error("leading zeros not allowed")
+ elif re.match("-([^0-9]|$)", s):
+ self.__error("'-' must be followed by digit")
+ elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
+ self.__error("decimal point must be followed by digit")
+ elif re.search("e[-+]?([^0-9]|$)", s):
+ self.__error("exponent must contain at least one digit")
+ else:
+ self.__error("syntax error in number")
+
+ def __lex_number(self, c):
+ if c in ".0123456789eE-+":
+ self.buffer += c
+ return True
+ else:
+ self.__lex_finish_number()
+ return False
+
+ __4hex_re = re.compile("[0-9a-fA-F]{4}")
+ def __lex_4hex(self, s):
+ if len(s) < 4:
+ self.__error("quoted string ends within \\u escape")
+ elif not Parser.__4hex_re.match(s):
+ self.__error("malformed \\u escape")
+ elif s == "0000":
+ self.__error("null bytes not supported in quoted strings")
+ else:
+ return int(s, 16)
+ @staticmethod
+ def __is_leading_surrogate(c):
+ """Returns true if 'c' is a Unicode code point for a leading
+ surrogate."""
+ return c >= 0xd800 and c <= 0xdbff
+ @staticmethod
+ def __is_trailing_surrogate(c):
+ """Returns true if 'c' is a Unicode code point for a trailing
+ surrogate."""
+ return c >= 0xdc00 and c <= 0xdfff
+ @staticmethod
+ def __utf16_decode_surrogate_pair(leading, trailing):
+ """Returns the unicode code point corresponding to leading surrogate
+ 'leading' and trailing surrogate 'trailing'. The return value will not
+ make any sense if 'leading' or 'trailing' are not in the correct ranges
+ for leading or trailing surrogates."""
+ # Leading surrogate: 110110wwwwxxxxxx
+ # Trailing surrogate: 110111xxxxxxxxxx
+ # Code point: 000uuuuuxxxxxxxxxxxxxxxx
+ w = (leading >> 6) & 0xf
+ u = w + 1
+ x0 = leading & 0x3f
+ x1 = trailing & 0x3ff
+ return (u << 16) | (x0 << 10) | x1
+ __unescape = {'"': u'"',
+ "\\": u"\\",
+ "/": u"/",
+ "b": u"\b",
+ "f": u"\f",
+ "n": u"\n",
+ "r": u"\r",
+ "t": u"\t"}
+ def __lex_finish_string(self):
+ inp = self.buffer
+ out = u""
+ while len(inp):
+ backslash = inp.find('\\')
+ if backslash == -1:
+ out += inp
+ break
+ out += inp[:backslash]
+ inp = inp[backslash + 1:]
+ if inp == "":
+ self.__error("quoted string may not end with backslash")
+ return
+
+ replacement = Parser.__unescape.get(inp[0])
+ if replacement is not None:
+ out += replacement
+ inp = inp[1:]
+ continue
+ elif inp[0] != u'u':
+ self.__error("bad escape \\%s" % inp[0])
+ return
+
+ c0 = self.__lex_4hex(inp[1:5])
+ if c0 is None:
+ return
+ inp = inp[5:]
+
+ if Parser.__is_leading_surrogate(c0):
+ if inp[:2] != u'\\u':
+ self.__error("malformed escaped surrogate pair")
+ return
+ c1 = self.__lex_4hex(inp[2:6])
+ if c1 is None:
+ return
+ if not Parser.__is_trailing_surrogate(c1):
+ self.__error("second half of escaped surrogate pair is "
+ "not trailing surrogate")
+ return
+ code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
+ inp = inp[6:]
+ else:
+ code_point = c0
+ out += unichr(code_point)
+ self.__parser_input('string', out)
+
+ def __lex_string_escape(self, c):
+ self.buffer += c
+ self.lex_state = Parser.__lex_string
+ return True
+ def __lex_string(self, c):
+ if c == '\\':
+ self.buffer += c
+ self.lex_state = Parser.__lex_string_escape
+ elif c == '"':
+ self.__lex_finish_string()
+ elif ord(c) >= 0x20:
+ self.buffer += c
+ else:
+ self.__error("U+%04X must be escaped in quoted string" % ord(c))
+ return True
+
+ def __lex_input(self, c):
+ self.byte_number += 1
+ if c == '\n':
+ self.column_number = 0
+ self.line_number += 1
+ else:
+ self.column_number += 1
+
+ eat = self.lex_state(self, c)
+ assert eat is True or eat is False
+ return eat
+
+ def __parse_start(self, token, string):
+ if token == '{':
+ self.__push_object()
+ elif token == '[':
+ self.__push_array()
+ else:
+ self.__error("syntax error at beginning of input")
+ def __parse_end(self, token, string):
+ self.__error("trailing garbage at end of input")
+ def __parse_object_init(self, token, string):
+ if token == '}':
+ self.__parser_pop()
+ else:
+ self.__parse_object_name(token, string)
+ def __parse_object_name(self, token, string):
+ if token == 'string':
+ self.member_name = string
+ self.parse_state = Parser.__parse_object_colon
+ else:
+ self.__error("syntax error parsing object expecting string")
+ def __parse_object_colon(self, token, string):
+ if token == ":":
+ self.parse_state = Parser.__parse_object_value
+ else:
+ self.__error("syntax error parsing object expecting ':'")
+ def __parse_object_value(self, token, string):
+ self.__parse_value(token, string, Parser.__parse_object_next)
+ def __parse_object_next(self, token, string):
+ if token == ",":
+ self.parse_state = Parser.__parse_object_name
+ elif token == "}":
+ self.__parser_pop()
+ else:
+ self.__error("syntax error expecting '}' or ','")
+ def __parse_array_init(self, token, string):
+ if token == ']':
+ self.__parser_pop()
+ else:
+ self.__parse_array_value(token, string)
+ def __parse_array_value(self, token, string):
+ self.__parse_value(token, string, Parser.__parse_array_next)
+ def __parse_array_next(self, token, string):
+ if token == ",":
+ self.parse_state = Parser.__parse_array_value
+ elif token == "]":
+ self.__parser_pop()
+ else:
+ self.__error("syntax error expecting ']' or ','")
+ def __parser_input(self, token, string=None):
+ self.lex_state = Parser.__lex_start
+ self.buffer = ""
+ #old_state = self.parse_state
+ self.parse_state(self, token, string)
+ #print ("token=%s string=%s old_state=%s new_state=%s"
+ # % (token, string, old_state, self.parse_state))
+
+ def __put_value(self, value):
+ top = self.stack[-1]
+ if type(top) == dict:
+ top[self.member_name] = value
+ else:
+ top.append(value)
+
+ def __parser_push(self, new_json, next_state):
+ if len(self.stack) < Parser.MAX_HEIGHT:
+ if len(self.stack) > 0:
+ self.__put_value(new_json)
+ self.stack.append(new_json)
+ self.parse_state = next_state
+ else:
+ self.__error("input exceeds maximum nesting depth %d" %
+ Parser.MAX_HEIGHT)
+ def __push_object(self):
+ self.__parser_push({}, Parser.__parse_object_init)
+ def __push_array(self):
+ self.__parser_push([], Parser.__parse_array_init)
+
+ def __parser_pop(self):
+ if len(self.stack) == 1:
+ self.parse_state = Parser.__parse_end
+ if not self.check_trailer:
+ self.done = True
+ else:
+ self.stack.pop()
+ top = self.stack[-1]
+ if type(top) == list:
+ self.parse_state = Parser.__parse_array_next
+ else:
+ self.parse_state = Parser.__parse_object_next
+
+ def __parse_value(self, token, string, next_state):
+ if token in [False, None, True] or type(token) in [int, long, float]:
+ self.__put_value(token)
+ elif token == 'string':
+ self.__put_value(string)
+ else:
+ if token == '{':
+ self.__push_object()
+ elif token == '[':
+ self.__push_array()
+ else:
+ self.__error("syntax error expecting value")
+ return
+ self.parse_state = next_state
+
+ def __error(self, message):
+ if self.error is None:
+ self.error = ("line %d, column %d, byte %d: %s"
+ % (self.line_number, self.column_number,
+ self.byte_number, message))
+ self.done = True
+
+ def feed(self, s):
+ i = 0
+ while True:
+ if self.done or i >= len(s):
+ return i
+ if self.__lex_input(s[i]):
+ i += 1
+
+ def is_done(self):
+ return self.done
+
+ def finish(self):
+ if self.lex_state == Parser.__lex_start:
+ pass
+ elif self.lex_state in (Parser.__lex_string,
+ Parser.__lex_string_escape):
+ self.__error("unexpected end of input in quoted string")
+ else:
+ self.__lex_input(" ")
+
+ if self.parse_state == Parser.__parse_start:
+ self.__error("empty input stream")
+ elif self.parse_state != Parser.__parse_end:
+ self.__error("unexpected end of input")
+
+ if self.error == None:
+ assert len(self.stack) == 1
+ return self.stack.pop()
+ else:
+ return self.error