Implement initial Python bindings for Open vSwitch database.
[sliver-openvswitch.git] / python / ovs / json.py
1 # Copyright (c) 2010 Nicira Networks
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import re
16 import StringIO
17 import sys
18
19 escapes = {ord('"'): u"\\\"",
20            ord("\\"): u"\\\\",
21            ord("\b"): u"\\b",
22            ord("\f"): u"\\f",
23            ord("\n"): u"\\n",
24            ord("\r"): u"\\r",
25            ord("\t"): u"\\t"}
26 for i in range(32):
27     if i not in escapes:
28         escapes[i] = u"\\u%04x" % i
29
30 def __dump_string(stream, s):
31     stream.write(u"\"")
32     for c in s:
33         x = ord(c)
34         escape = escapes.get(x)
35         if escape:
36             stream.write(escape)
37         else:
38             stream.write(c)
39     stream.write(u"\"")
40
41 def to_stream(obj, stream, pretty=False, sort_keys=True):
42     if obj is None:
43         stream.write(u"null")
44     elif obj is False:
45         stream.write(u"false")
46     elif obj is True:
47         stream.write(u"true")
48     elif type(obj) in (int, long):
49         stream.write(u"%d" % obj)
50     elif type(obj) == float:
51         stream.write("%.15g" % obj)
52     elif type(obj) == unicode:
53         __dump_string(stream, obj)
54     elif type(obj) == str:
55         __dump_string(stream, unicode(obj))
56     elif type(obj) == dict:
57         stream.write(u"{")
58         if sort_keys:
59             items = sorted(obj.items())
60         else:
61             items = obj.iteritems()
62         i = 0
63         for key, value in items:
64             if i > 0:
65                 stream.write(u",")
66             i += 1
67             __dump_string(stream, unicode(key))
68             stream.write(u":")
69             to_stream(value, stream, pretty, sort_keys)
70         stream.write(u"}")
71     elif type(obj) in (list, tuple):
72         stream.write(u"[")
73         i = 0
74         for value in obj:
75             if i > 0:
76                 stream.write(u",")
77             i += 1
78             to_stream(value, stream, pretty, sort_keys)
79         stream.write(u"]")
80     else:
81         raise Error("can't serialize %s as JSON" % obj)
82
83 def to_file(obj, name, pretty=False, sort_keys=True):
84     stream = open(name, "w")
85     try:
86         to_stream(obj, stream, pretty, sort_keys)
87     finally:
88         stream.close()
89
90 def to_string(obj, pretty=False, sort_keys=True):
91     output = StringIO.StringIO()
92     to_stream(obj, output, pretty, sort_keys)
93     s = output.getvalue()
94     output.close()
95     return s
96
97 def from_stream(stream):
98     p = Parser(check_trailer=True)
99     while True:
100         buf = stream.read(4096)
101         if buf == "" or p.feed(buf) != len(buf):
102             break
103     return p.finish()
104
105 def from_file(name):
106     stream = open(name, "r")
107     try:
108         return from_stream(stream)
109     finally:
110         stream.close()
111
112 def from_string(s):
113     try:
114         s = unicode(s, 'utf-8')
115     except UnicodeDecodeError, e:
116         seq = ' '.join(["0x%2x" % ord(c) for c in e.object[e.start:e.end]])
117         raise Error("\"%s\" is not a valid UTF-8 string: "
118                     "invalid UTF-8 sequence %s" % (s, seq),
119                     tag="constraint violation")
120     p = Parser(check_trailer=True)
121     p.feed(s)
122     return p.finish()
123
124 class Parser(object):
125     ## Maximum height of parsing stack. ##
126     MAX_HEIGHT = 1000
127
128     def __init__(self, check_trailer=False):
129         self.check_trailer = check_trailer
130
131         # Lexical analysis.
132         self.lex_state = Parser.__lex_start
133         self.buffer = ""
134         self.line_number = 0
135         self.column_number = 0
136         self.byte_number = 0
137         
138         # Parsing.
139         self.parse_state = Parser.__parse_start
140         self.stack = []
141         self.member_name = None
142
143         # Parse status.
144         self.done = False
145         self.error = None
146
147     def __lex_start_space(self, c):
148         pass
149     def __lex_start_alpha(self, c):
150         self.buffer = c
151         self.lex_state = Parser.__lex_keyword
152     def __lex_start_token(self, c):
153         self.__parser_input(c)
154     def __lex_start_number(self, c):
155         self.buffer = c
156         self.lex_state = Parser.__lex_number
157     def __lex_start_string(self, c):
158         self.lex_state = Parser.__lex_string
159     def __lex_start_error(self, c):
160         if ord(c) >= 32 and ord(c) < 128:
161             self.__error("invalid character '%s'" % c)
162         else:
163             self.__error("invalid character U+%04x" % ord(c))
164
165     __lex_start_actions = {}
166     for c in " \t\n\r":
167         __lex_start_actions[c] = __lex_start_space
168     for c in "abcdefghijklmnopqrstuvwxyz":
169         __lex_start_actions[c] = __lex_start_alpha
170     for c in "[{]}:,":
171         __lex_start_actions[c] = __lex_start_token
172     for c in "-0123456789":
173         __lex_start_actions[c] = __lex_start_number
174     __lex_start_actions['"'] = __lex_start_string
175     def __lex_start(self, c):
176         Parser.__lex_start_actions.get(
177             c, Parser.__lex_start_error)(self, c)
178         return True
179
180     __lex_alpha = {}
181     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
182         __lex_alpha[c] = True
183     def __lex_finish_keyword(self):
184         if self.buffer == "false":
185             self.__parser_input(False)
186         elif self.buffer == "true":
187             self.__parser_input(True)
188         elif self.buffer == "null":
189             self.__parser_input(None)
190         else:
191             self.__error("invalid keyword '%s'" % self.buffer)
192     def __lex_keyword(self, c):
193         if c in Parser.__lex_alpha:
194             self.buffer += c
195             return True
196         else:
197             self.__lex_finish_keyword()
198             return False
199
200     __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
201     def __lex_finish_number(self):
202         s = self.buffer
203         m = Parser.__number_re.match(s)
204         if m:
205             sign, integer, fraction, exp = m.groups() 
206             if (exp is not None and
207                 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
208                 self.__error("exponent outside valid range")
209                 return
210
211             if fraction is not None and len(fraction.lstrip('0')) == 0:
212                 fraction = None
213
214             sig_string = integer
215             if fraction is not None:
216                 sig_string += fraction
217             significand = int(sig_string)
218
219             pow10 = 0
220             if fraction is not None:
221                 pow10 -= len(fraction)
222             if exp is not None:
223                 pow10 += long(exp)
224
225             if significand == 0:
226                 self.__parser_input(0)
227                 return
228             elif significand <= 2**63:
229                 while pow10 > 0 and significand <= 2*63:
230                     significand *= 10
231                     pow10 -= 1
232                 while pow10 < 0 and significand % 10 == 0:
233                     significand /= 10
234                     pow10 += 1
235                 if (pow10 == 0 and
236                     ((not sign and significand < 2**63) or
237                      (sign and significand <= 2**63))):
238                     if sign:
239                         self.__parser_input(-significand)
240                     else:
241                         self.__parser_input(significand)
242                     return
243
244             value = float(s)
245             if value == float("inf") or value == float("-inf"):
246                 self.__error("number outside valid range")
247                 return
248             if value == 0:
249                 # Suppress negative zero.
250                 value = 0
251             self.__parser_input(value)
252         elif re.match("-?0[0-9]", s):
253             self.__error("leading zeros not allowed")
254         elif re.match("-([^0-9]|$)", s):
255             self.__error("'-' must be followed by digit")
256         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
257             self.__error("decimal point must be followed by digit")
258         elif re.search("e[-+]?([^0-9]|$)", s):
259             self.__error("exponent must contain at least one digit")
260         else:
261             self.__error("syntax error in number")
262             
263     def __lex_number(self, c):
264         if c in ".0123456789eE-+":
265             self.buffer += c
266             return True
267         else:
268             self.__lex_finish_number()
269             return False
270
271     __4hex_re = re.compile("[0-9a-fA-F]{4}")
272     def __lex_4hex(self, s):
273         if len(s) < 4:
274             self.__error("quoted string ends within \\u escape")
275         elif not Parser.__4hex_re.match(s):
276             self.__error("malformed \\u escape")
277         elif s == "0000":
278             self.__error("null bytes not supported in quoted strings")
279         else:
280             return int(s, 16)
281     @staticmethod
282     def __is_leading_surrogate(c):
283         """Returns true if 'c' is a Unicode code point for a leading
284         surrogate."""
285         return c >= 0xd800 and c <= 0xdbff
286     @staticmethod
287     def __is_trailing_surrogate(c):
288         """Returns true if 'c' is a Unicode code point for a trailing
289         surrogate."""
290         return c >= 0xdc00 and c <= 0xdfff
291     @staticmethod
292     def __utf16_decode_surrogate_pair(leading, trailing):
293         """Returns the unicode code point corresponding to leading surrogate
294         'leading' and trailing surrogate 'trailing'.  The return value will not
295         make any sense if 'leading' or 'trailing' are not in the correct ranges
296         for leading or trailing surrogates."""
297         #  Leading surrogate:         110110wwwwxxxxxx
298         # Trailing surrogate:         110111xxxxxxxxxx
299         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
300         w = (leading >> 6) & 0xf
301         u = w + 1
302         x0 = leading & 0x3f
303         x1 = trailing & 0x3ff
304         return (u << 16) | (x0 << 10) | x1
305     __unescape = {'"': u'"',
306                   "\\": u"\\",
307                   "/": u"/",
308                   "b": u"\b",
309                   "f": u"\f",
310                   "n": u"\n",
311                   "r": u"\r",
312                   "t": u"\t"}
313     def __lex_finish_string(self):
314         inp = self.buffer
315         out = u""
316         while len(inp):
317             backslash = inp.find('\\')
318             if backslash == -1:
319                 out += inp
320                 break
321             out += inp[:backslash]
322             inp = inp[backslash + 1:]
323             if inp == "":
324                 self.__error("quoted string may not end with backslash")
325                 return
326
327             replacement = Parser.__unescape.get(inp[0])
328             if replacement is not None:
329                 out += replacement
330                 inp = inp[1:]
331                 continue
332             elif inp[0] != u'u':
333                 self.__error("bad escape \\%s" % inp[0])
334                 return
335             
336             c0 = self.__lex_4hex(inp[1:5])
337             if c0 is None:
338                 return
339             inp = inp[5:]
340
341             if Parser.__is_leading_surrogate(c0):
342                 if inp[:2] != u'\\u':
343                     self.__error("malformed escaped surrogate pair")
344                     return
345                 c1 = self.__lex_4hex(inp[2:6])
346                 if c1 is None:
347                     return
348                 if not Parser.__is_trailing_surrogate(c1):
349                     self.__error("second half of escaped surrogate pair is "
350                                  "not trailing surrogate")
351                     return
352                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
353                 inp = inp[6:]
354             else:
355                 code_point = c0
356             out += unichr(code_point)
357         self.__parser_input('string', out)
358
359     def __lex_string_escape(self, c):
360         self.buffer += c
361         self.lex_state = Parser.__lex_string
362         return True
363     def __lex_string(self, c):
364         if c == '\\':
365             self.buffer += c
366             self.lex_state = Parser.__lex_string_escape
367         elif c == '"':
368             self.__lex_finish_string()
369         elif ord(c) >= 0x20:
370             self.buffer += c
371         else:
372             self.__error("U+%04X must be escaped in quoted string" % ord(c))
373         return True
374
375     def __lex_input(self, c):
376         self.byte_number += 1
377         if c == '\n':
378             self.column_number = 0
379             self.line_number += 1
380         else:
381             self.column_number += 1
382
383         eat = self.lex_state(self, c)
384         assert eat is True or eat is False
385         return eat
386
387     def __parse_start(self, token, string):
388         if token == '{':
389             self.__push_object()
390         elif token == '[':
391             self.__push_array()
392         else:
393             self.__error("syntax error at beginning of input")
394     def __parse_end(self, token, string):
395         self.__error("trailing garbage at end of input")
396     def __parse_object_init(self, token, string):
397         if token == '}':
398             self.__parser_pop()
399         else:
400             self.__parse_object_name(token, string)
401     def __parse_object_name(self, token, string):
402         if token == 'string':
403             self.member_name = string
404             self.parse_state = Parser.__parse_object_colon
405         else:
406             self.__error("syntax error parsing object expecting string")
407     def __parse_object_colon(self, token, string):
408         if token == ":":
409             self.parse_state = Parser.__parse_object_value
410         else:
411             self.__error("syntax error parsing object expecting ':'")
412     def __parse_object_value(self, token, string):
413         self.__parse_value(token, string, Parser.__parse_object_next)
414     def __parse_object_next(self, token, string):
415         if token == ",":
416             self.parse_state = Parser.__parse_object_name
417         elif token == "}":
418             self.__parser_pop()
419         else:
420             self.__error("syntax error expecting '}' or ','")
421     def __parse_array_init(self, token, string):
422         if token == ']':
423             self.__parser_pop()
424         else:
425             self.__parse_array_value(token, string)
426     def __parse_array_value(self, token, string):
427         self.__parse_value(token, string, Parser.__parse_array_next)
428     def __parse_array_next(self, token, string):
429         if token == ",":
430             self.parse_state = Parser.__parse_array_value
431         elif token == "]":
432             self.__parser_pop()
433         else:
434             self.__error("syntax error expecting ']' or ','")
435     def __parser_input(self, token, string=None):
436         self.lex_state = Parser.__lex_start
437         self.buffer = ""
438         #old_state = self.parse_state
439         self.parse_state(self, token, string)
440         #print ("token=%s string=%s old_state=%s new_state=%s"
441         #       % (token, string, old_state, self.parse_state))
442
443     def __put_value(self, value):
444         top = self.stack[-1]
445         if type(top) == dict:
446             top[self.member_name] = value
447         else:
448             top.append(value)
449
450     def __parser_push(self, new_json, next_state):
451         if len(self.stack) < Parser.MAX_HEIGHT:
452             if len(self.stack) > 0:
453                 self.__put_value(new_json)
454             self.stack.append(new_json)
455             self.parse_state = next_state
456         else:
457             self.__error("input exceeds maximum nesting depth %d" %
458                          Parser.MAX_HEIGHT)
459     def __push_object(self):
460         self.__parser_push({}, Parser.__parse_object_init)
461     def __push_array(self):
462         self.__parser_push([], Parser.__parse_array_init)
463
464     def __parser_pop(self):
465         if len(self.stack) == 1:
466             self.parse_state = Parser.__parse_end
467             if not self.check_trailer:
468                 self.done = True
469         else:
470             self.stack.pop()
471             top = self.stack[-1]
472             if type(top) == list:
473                 self.parse_state = Parser.__parse_array_next
474             else:
475                 self.parse_state = Parser.__parse_object_next
476
477     def __parse_value(self, token, string, next_state):
478         if token in [False, None, True] or type(token) in [int, long, float]:
479             self.__put_value(token)
480         elif token == 'string':
481             self.__put_value(string)
482         else:
483             if token == '{':
484                 self.__push_object()
485             elif token == '[':
486                 self.__push_array()
487             else:
488                 self.__error("syntax error expecting value")
489             return
490         self.parse_state = next_state
491
492     def __error(self, message):
493         if self.error is None:
494             self.error = ("line %d, column %d, byte %d: %s"
495                           % (self.line_number, self.column_number,
496                              self.byte_number, message))
497             self.done = True
498
499     def feed(self, s):
500         i = 0
501         while True:
502             if self.done or i >= len(s):
503                 return i
504             if self.__lex_input(s[i]):
505                 i += 1
506
507     def is_done(self):
508         return self.done
509
510     def finish(self):
511         if self.lex_state == Parser.__lex_start:
512             pass
513         elif self.lex_state in (Parser.__lex_string,
514                                 Parser.__lex_string_escape):
515             self.__error("unexpected end of input in quoted string")
516         else:
517             self.__lex_input(" ")
518
519         if self.parse_state == Parser.__parse_start:
520             self.__error("empty input stream")
521         elif self.parse_state != Parser.__parse_end:
522             self.__error("unexpected end of input")
523
524         if self.error == None:
525             assert len(self.stack) == 1
526             return self.stack.pop()
527         else:
528             return self.error