f8b02d18004cbb640e303f8997269abf7704f9c6
[sliver-openvswitch.git] / python / ovs / json.py
1 # Copyright (c) 2010, 2011 Nicira Networks
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import re
16 import StringIO
17 import sys
18
19 escapes = {ord('"'): u"\\\"",
20            ord("\\"): u"\\\\",
21            ord("\b"): u"\\b",
22            ord("\f"): u"\\f",
23            ord("\n"): u"\\n",
24            ord("\r"): u"\\r",
25            ord("\t"): u"\\t"}
26 for i in range(32):
27     if i not in escapes:
28         escapes[i] = u"\\u%04x" % i
29
30 def __dump_string(stream, s):
31     stream.write(u"\"")
32     for c in s:
33         x = ord(c)
34         escape = escapes.get(x)
35         if escape:
36             stream.write(escape)
37         else:
38             stream.write(c)
39     stream.write(u"\"")
40
41 def to_stream(obj, stream, pretty=False, sort_keys=True):
42     if obj is None:
43         stream.write(u"null")
44     elif obj is False:
45         stream.write(u"false")
46     elif obj is True:
47         stream.write(u"true")
48     elif type(obj) in (int, long):
49         stream.write(u"%d" % obj)
50     elif type(obj) == float:
51         stream.write("%.15g" % obj)
52     elif type(obj) == unicode:
53         __dump_string(stream, obj)
54     elif type(obj) == str:
55         __dump_string(stream, unicode(obj))
56     elif type(obj) == dict:
57         stream.write(u"{")
58         if sort_keys:
59             items = sorted(obj.items())
60         else:
61             items = obj.iteritems()
62         i = 0
63         for key, value in items:
64             if i > 0:
65                 stream.write(u",")
66             i += 1
67             __dump_string(stream, unicode(key))
68             stream.write(u":")
69             to_stream(value, stream, pretty, sort_keys)
70         stream.write(u"}")
71     elif type(obj) in (list, tuple):
72         stream.write(u"[")
73         i = 0
74         for value in obj:
75             if i > 0:
76                 stream.write(u",")
77             i += 1
78             to_stream(value, stream, pretty, sort_keys)
79         stream.write(u"]")
80     else:
81         raise Error("can't serialize %s as JSON" % obj)
82
83 def to_file(obj, name, pretty=False, sort_keys=True):
84     stream = open(name, "w")
85     try:
86         to_stream(obj, stream, pretty, sort_keys)
87     finally:
88         stream.close()
89
90 def to_string(obj, pretty=False, sort_keys=True):
91     output = StringIO.StringIO()
92     to_stream(obj, output, pretty, sort_keys)
93     s = output.getvalue()
94     output.close()
95     return s
96
97 def from_stream(stream):
98     p = Parser(check_trailer=True)
99     while True:
100         buf = stream.read(4096)
101         if buf == "" or p.feed(buf) != len(buf):
102             break
103     return p.finish()
104
105 def from_file(name):
106     stream = open(name, "r")
107     try:
108         return from_stream(stream)
109     finally:
110         stream.close()
111
112 def from_string(s):
113     try:
114         s = unicode(s, 'utf-8')
115     except UnicodeDecodeError, e:
116         seq = ' '.join(["0x%2x" % ord(c) for c in e.object[e.start:e.end]])
117         return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
118     p = Parser(check_trailer=True)
119     p.feed(s)
120     return p.finish()
121
122 class Parser(object):
123     ## Maximum height of parsing stack. ##
124     MAX_HEIGHT = 1000
125
126     def __init__(self, check_trailer=False):
127         self.check_trailer = check_trailer
128
129         # Lexical analysis.
130         self.lex_state = Parser.__lex_start
131         self.buffer = ""
132         self.line_number = 0
133         self.column_number = 0
134         self.byte_number = 0
135         
136         # Parsing.
137         self.parse_state = Parser.__parse_start
138         self.stack = []
139         self.member_name = None
140
141         # Parse status.
142         self.done = False
143         self.error = None
144
145     def __lex_start_space(self, c):
146         pass
147     def __lex_start_alpha(self, c):
148         self.buffer = c
149         self.lex_state = Parser.__lex_keyword
150     def __lex_start_token(self, c):
151         self.__parser_input(c)
152     def __lex_start_number(self, c):
153         self.buffer = c
154         self.lex_state = Parser.__lex_number
155     def __lex_start_string(self, c):
156         self.lex_state = Parser.__lex_string
157     def __lex_start_error(self, c):
158         if ord(c) >= 32 and ord(c) < 128:
159             self.__error("invalid character '%s'" % c)
160         else:
161             self.__error("invalid character U+%04x" % ord(c))
162
163     __lex_start_actions = {}
164     for c in " \t\n\r":
165         __lex_start_actions[c] = __lex_start_space
166     for c in "abcdefghijklmnopqrstuvwxyz":
167         __lex_start_actions[c] = __lex_start_alpha
168     for c in "[{]}:,":
169         __lex_start_actions[c] = __lex_start_token
170     for c in "-0123456789":
171         __lex_start_actions[c] = __lex_start_number
172     __lex_start_actions['"'] = __lex_start_string
173     def __lex_start(self, c):
174         Parser.__lex_start_actions.get(
175             c, Parser.__lex_start_error)(self, c)
176         return True
177
178     __lex_alpha = {}
179     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
180         __lex_alpha[c] = True
181     def __lex_finish_keyword(self):
182         if self.buffer == "false":
183             self.__parser_input(False)
184         elif self.buffer == "true":
185             self.__parser_input(True)
186         elif self.buffer == "null":
187             self.__parser_input(None)
188         else:
189             self.__error("invalid keyword '%s'" % self.buffer)
190     def __lex_keyword(self, c):
191         if c in Parser.__lex_alpha:
192             self.buffer += c
193             return True
194         else:
195             self.__lex_finish_keyword()
196             return False
197
198     __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
199     def __lex_finish_number(self):
200         s = self.buffer
201         m = Parser.__number_re.match(s)
202         if m:
203             sign, integer, fraction, exp = m.groups() 
204             if (exp is not None and
205                 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
206                 self.__error("exponent outside valid range")
207                 return
208
209             if fraction is not None and len(fraction.lstrip('0')) == 0:
210                 fraction = None
211
212             sig_string = integer
213             if fraction is not None:
214                 sig_string += fraction
215             significand = int(sig_string)
216
217             pow10 = 0
218             if fraction is not None:
219                 pow10 -= len(fraction)
220             if exp is not None:
221                 pow10 += long(exp)
222
223             if significand == 0:
224                 self.__parser_input(0)
225                 return
226             elif significand <= 2**63:
227                 while pow10 > 0 and significand <= 2*63:
228                     significand *= 10
229                     pow10 -= 1
230                 while pow10 < 0 and significand % 10 == 0:
231                     significand /= 10
232                     pow10 += 1
233                 if (pow10 == 0 and
234                     ((not sign and significand < 2**63) or
235                      (sign and significand <= 2**63))):
236                     if sign:
237                         self.__parser_input(-significand)
238                     else:
239                         self.__parser_input(significand)
240                     return
241
242             value = float(s)
243             if value == float("inf") or value == float("-inf"):
244                 self.__error("number outside valid range")
245                 return
246             if value == 0:
247                 # Suppress negative zero.
248                 value = 0
249             self.__parser_input(value)
250         elif re.match("-?0[0-9]", s):
251             self.__error("leading zeros not allowed")
252         elif re.match("-([^0-9]|$)", s):
253             self.__error("'-' must be followed by digit")
254         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
255             self.__error("decimal point must be followed by digit")
256         elif re.search("e[-+]?([^0-9]|$)", s):
257             self.__error("exponent must contain at least one digit")
258         else:
259             self.__error("syntax error in number")
260             
261     def __lex_number(self, c):
262         if c in ".0123456789eE-+":
263             self.buffer += c
264             return True
265         else:
266             self.__lex_finish_number()
267             return False
268
269     __4hex_re = re.compile("[0-9a-fA-F]{4}")
270     def __lex_4hex(self, s):
271         if len(s) < 4:
272             self.__error("quoted string ends within \\u escape")
273         elif not Parser.__4hex_re.match(s):
274             self.__error("malformed \\u escape")
275         elif s == "0000":
276             self.__error("null bytes not supported in quoted strings")
277         else:
278             return int(s, 16)
279     @staticmethod
280     def __is_leading_surrogate(c):
281         """Returns true if 'c' is a Unicode code point for a leading
282         surrogate."""
283         return c >= 0xd800 and c <= 0xdbff
284     @staticmethod
285     def __is_trailing_surrogate(c):
286         """Returns true if 'c' is a Unicode code point for a trailing
287         surrogate."""
288         return c >= 0xdc00 and c <= 0xdfff
289     @staticmethod
290     def __utf16_decode_surrogate_pair(leading, trailing):
291         """Returns the unicode code point corresponding to leading surrogate
292         'leading' and trailing surrogate 'trailing'.  The return value will not
293         make any sense if 'leading' or 'trailing' are not in the correct ranges
294         for leading or trailing surrogates."""
295         #  Leading surrogate:         110110wwwwxxxxxx
296         # Trailing surrogate:         110111xxxxxxxxxx
297         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
298         w = (leading >> 6) & 0xf
299         u = w + 1
300         x0 = leading & 0x3f
301         x1 = trailing & 0x3ff
302         return (u << 16) | (x0 << 10) | x1
303     __unescape = {'"': u'"',
304                   "\\": u"\\",
305                   "/": u"/",
306                   "b": u"\b",
307                   "f": u"\f",
308                   "n": u"\n",
309                   "r": u"\r",
310                   "t": u"\t"}
311     def __lex_finish_string(self):
312         inp = self.buffer
313         out = u""
314         while len(inp):
315             backslash = inp.find('\\')
316             if backslash == -1:
317                 out += inp
318                 break
319             out += inp[:backslash]
320             inp = inp[backslash + 1:]
321             if inp == "":
322                 self.__error("quoted string may not end with backslash")
323                 return
324
325             replacement = Parser.__unescape.get(inp[0])
326             if replacement is not None:
327                 out += replacement
328                 inp = inp[1:]
329                 continue
330             elif inp[0] != u'u':
331                 self.__error("bad escape \\%s" % inp[0])
332                 return
333             
334             c0 = self.__lex_4hex(inp[1:5])
335             if c0 is None:
336                 return
337             inp = inp[5:]
338
339             if Parser.__is_leading_surrogate(c0):
340                 if inp[:2] != u'\\u':
341                     self.__error("malformed escaped surrogate pair")
342                     return
343                 c1 = self.__lex_4hex(inp[2:6])
344                 if c1 is None:
345                     return
346                 if not Parser.__is_trailing_surrogate(c1):
347                     self.__error("second half of escaped surrogate pair is "
348                                  "not trailing surrogate")
349                     return
350                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
351                 inp = inp[6:]
352             else:
353                 code_point = c0
354             out += unichr(code_point)
355         self.__parser_input('string', out)
356
357     def __lex_string_escape(self, c):
358         self.buffer += c
359         self.lex_state = Parser.__lex_string
360         return True
361     def __lex_string(self, c):
362         if c == '\\':
363             self.buffer += c
364             self.lex_state = Parser.__lex_string_escape
365         elif c == '"':
366             self.__lex_finish_string()
367         elif ord(c) >= 0x20:
368             self.buffer += c
369         else:
370             self.__error("U+%04X must be escaped in quoted string" % ord(c))
371         return True
372
373     def __lex_input(self, c):
374         self.byte_number += 1
375         if c == '\n':
376             self.column_number = 0
377             self.line_number += 1
378         else:
379             self.column_number += 1
380
381         eat = self.lex_state(self, c)
382         assert eat is True or eat is False
383         return eat
384
385     def __parse_start(self, token, string):
386         if token == '{':
387             self.__push_object()
388         elif token == '[':
389             self.__push_array()
390         else:
391             self.__error("syntax error at beginning of input")
392     def __parse_end(self, token, string):
393         self.__error("trailing garbage at end of input")
394     def __parse_object_init(self, token, string):
395         if token == '}':
396             self.__parser_pop()
397         else:
398             self.__parse_object_name(token, string)
399     def __parse_object_name(self, token, string):
400         if token == 'string':
401             self.member_name = string
402             self.parse_state = Parser.__parse_object_colon
403         else:
404             self.__error("syntax error parsing object expecting string")
405     def __parse_object_colon(self, token, string):
406         if token == ":":
407             self.parse_state = Parser.__parse_object_value
408         else:
409             self.__error("syntax error parsing object expecting ':'")
410     def __parse_object_value(self, token, string):
411         self.__parse_value(token, string, Parser.__parse_object_next)
412     def __parse_object_next(self, token, string):
413         if token == ",":
414             self.parse_state = Parser.__parse_object_name
415         elif token == "}":
416             self.__parser_pop()
417         else:
418             self.__error("syntax error expecting '}' or ','")
419     def __parse_array_init(self, token, string):
420         if token == ']':
421             self.__parser_pop()
422         else:
423             self.__parse_array_value(token, string)
424     def __parse_array_value(self, token, string):
425         self.__parse_value(token, string, Parser.__parse_array_next)
426     def __parse_array_next(self, token, string):
427         if token == ",":
428             self.parse_state = Parser.__parse_array_value
429         elif token == "]":
430             self.__parser_pop()
431         else:
432             self.__error("syntax error expecting ']' or ','")
433     def __parser_input(self, token, string=None):
434         self.lex_state = Parser.__lex_start
435         self.buffer = ""
436         #old_state = self.parse_state
437         self.parse_state(self, token, string)
438         #print ("token=%s string=%s old_state=%s new_state=%s"
439         #       % (token, string, old_state, self.parse_state))
440
441     def __put_value(self, value):
442         top = self.stack[-1]
443         if type(top) == dict:
444             top[self.member_name] = value
445         else:
446             top.append(value)
447
448     def __parser_push(self, new_json, next_state):
449         if len(self.stack) < Parser.MAX_HEIGHT:
450             if len(self.stack) > 0:
451                 self.__put_value(new_json)
452             self.stack.append(new_json)
453             self.parse_state = next_state
454         else:
455             self.__error("input exceeds maximum nesting depth %d" %
456                          Parser.MAX_HEIGHT)
457     def __push_object(self):
458         self.__parser_push({}, Parser.__parse_object_init)
459     def __push_array(self):
460         self.__parser_push([], Parser.__parse_array_init)
461
462     def __parser_pop(self):
463         if len(self.stack) == 1:
464             self.parse_state = Parser.__parse_end
465             if not self.check_trailer:
466                 self.done = True
467         else:
468             self.stack.pop()
469             top = self.stack[-1]
470             if type(top) == list:
471                 self.parse_state = Parser.__parse_array_next
472             else:
473                 self.parse_state = Parser.__parse_object_next
474
475     def __parse_value(self, token, string, next_state):
476         if token in [False, None, True] or type(token) in [int, long, float]:
477             self.__put_value(token)
478         elif token == 'string':
479             self.__put_value(string)
480         else:
481             if token == '{':
482                 self.__push_object()
483             elif token == '[':
484                 self.__push_array()
485             else:
486                 self.__error("syntax error expecting value")
487             return
488         self.parse_state = next_state
489
490     def __error(self, message):
491         if self.error is None:
492             self.error = ("line %d, column %d, byte %d: %s"
493                           % (self.line_number, self.column_number,
494                              self.byte_number, message))
495             self.done = True
496
497     def feed(self, s):
498         i = 0
499         while True:
500             if self.done or i >= len(s):
501                 return i
502             if self.__lex_input(s[i]):
503                 i += 1
504
505     def is_done(self):
506         return self.done
507
508     def finish(self):
509         if self.lex_state == Parser.__lex_start:
510             pass
511         elif self.lex_state in (Parser.__lex_string,
512                                 Parser.__lex_string_escape):
513             self.__error("unexpected end of input in quoted string")
514         else:
515             self.__lex_input(" ")
516
517         if self.parse_state == Parser.__parse_start:
518             self.__error("empty input stream")
519         elif self.parse_state != Parser.__parse_end:
520             self.__error("unexpected end of input")
521
522         if self.error == None:
523             assert len(self.stack) == 1
524             return self.stack.pop()
525         else:
526             return self.error