Global replace of Nicira Networks.
[sliver-openvswitch.git] / python / ovs / json.py
1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import re
16 import StringIO
17 import sys
18
19 __pychecker__ = 'no-stringiter'
20
21 escapes = {ord('"'): u"\\\"",
22            ord("\\"): u"\\\\",
23            ord("\b"): u"\\b",
24            ord("\f"): u"\\f",
25            ord("\n"): u"\\n",
26            ord("\r"): u"\\r",
27            ord("\t"): u"\\t"}
28 for esc in range(32):
29     if esc not in escapes:
30         escapes[esc] = u"\\u%04x" % esc
31
32 SPACES_PER_LEVEL = 2
33
34
35 class _Serializer(object):
36     def __init__(self, stream, pretty, sort_keys):
37         self.stream = stream
38         self.pretty = pretty
39         self.sort_keys = sort_keys
40         self.depth = 0
41
42     def __serialize_string(self, s):
43         self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
44
45     def __indent_line(self):
46         if self.pretty:
47             self.stream.write('\n')
48             self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
49
50     def serialize(self, obj):
51         if obj is None:
52             self.stream.write(u"null")
53         elif obj is False:
54             self.stream.write(u"false")
55         elif obj is True:
56             self.stream.write(u"true")
57         elif type(obj) in (int, long):
58             self.stream.write(u"%d" % obj)
59         elif type(obj) == float:
60             self.stream.write("%.15g" % obj)
61         elif type(obj) == unicode:
62             self.__serialize_string(obj)
63         elif type(obj) == str:
64             self.__serialize_string(unicode(obj))
65         elif type(obj) == dict:
66             self.stream.write(u"{")
67
68             self.depth += 1
69             self.__indent_line()
70
71             if self.sort_keys:
72                 items = sorted(obj.items())
73             else:
74                 items = obj.iteritems()
75             for i, (key, value) in enumerate(items):
76                 if i > 0:
77                     self.stream.write(u",")
78                     self.__indent_line()
79                 self.__serialize_string(unicode(key))
80                 self.stream.write(u":")
81                 if self.pretty:
82                     self.stream.write(u' ')
83                 self.serialize(value)
84
85             self.stream.write(u"}")
86             self.depth -= 1
87         elif type(obj) in (list, tuple):
88             self.stream.write(u"[")
89             self.depth += 1
90
91             if obj:
92                 self.__indent_line()
93
94                 for i, value in enumerate(obj):
95                     if i > 0:
96                         self.stream.write(u",")
97                         self.__indent_line()
98                     self.serialize(value)
99
100             self.depth -= 1
101             self.stream.write(u"]")
102         else:
103             raise Exception("can't serialize %s as JSON" % obj)
104
105
106 def to_stream(obj, stream, pretty=False, sort_keys=True):
107     _Serializer(stream, pretty, sort_keys).serialize(obj)
108
109
110 def to_file(obj, name, pretty=False, sort_keys=True):
111     stream = open(name, "w")
112     try:
113         to_stream(obj, stream, pretty, sort_keys)
114     finally:
115         stream.close()
116
117
118 def to_string(obj, pretty=False, sort_keys=True):
119     output = StringIO.StringIO()
120     to_stream(obj, output, pretty, sort_keys)
121     s = output.getvalue()
122     output.close()
123     return s
124
125
126 def from_stream(stream):
127     p = Parser(check_trailer=True)
128     while True:
129         buf = stream.read(4096)
130         if buf == "" or p.feed(buf) != len(buf):
131             break
132     return p.finish()
133
134
135 def from_file(name):
136     stream = open(name, "r")
137     try:
138         return from_stream(stream)
139     finally:
140         stream.close()
141
142
143 def from_string(s):
144     try:
145         s = unicode(s, 'utf-8')
146     except UnicodeDecodeError, e:
147         seq = ' '.join(["0x%2x" % ord(c)
148                         for c in e.object[e.start:e.end] if ord(c) >= 0x80])
149         return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
150     p = Parser(check_trailer=True)
151     p.feed(s)
152     return p.finish()
153
154
155 class Parser(object):
156     ## Maximum height of parsing stack. ##
157     MAX_HEIGHT = 1000
158
159     def __init__(self, check_trailer=False):
160         self.check_trailer = check_trailer
161
162         # Lexical analysis.
163         self.lex_state = Parser.__lex_start
164         self.buffer = ""
165         self.line_number = 0
166         self.column_number = 0
167         self.byte_number = 0
168
169         # Parsing.
170         self.parse_state = Parser.__parse_start
171         self.stack = []
172         self.member_name = None
173
174         # Parse status.
175         self.done = False
176         self.error = None
177
178     def __lex_start_space(self, c):
179         pass
180
181     def __lex_start_alpha(self, c):
182         self.buffer = c
183         self.lex_state = Parser.__lex_keyword
184
185     def __lex_start_token(self, c):
186         self.__parser_input(c)
187
188     def __lex_start_number(self, c):
189         self.buffer = c
190         self.lex_state = Parser.__lex_number
191
192     def __lex_start_string(self, _):
193         self.lex_state = Parser.__lex_string
194
195     def __lex_start_error(self, c):
196         if ord(c) >= 32 and ord(c) < 128:
197             self.__error("invalid character '%s'" % c)
198         else:
199             self.__error("invalid character U+%04x" % ord(c))
200
201     __lex_start_actions = {}
202     for c in " \t\n\r":
203         __lex_start_actions[c] = __lex_start_space
204     for c in "abcdefghijklmnopqrstuvwxyz":
205         __lex_start_actions[c] = __lex_start_alpha
206     for c in "[{]}:,":
207         __lex_start_actions[c] = __lex_start_token
208     for c in "-0123456789":
209         __lex_start_actions[c] = __lex_start_number
210     __lex_start_actions['"'] = __lex_start_string
211
212     def __lex_start(self, c):
213         Parser.__lex_start_actions.get(
214             c, Parser.__lex_start_error)(self, c)
215         return True
216
217     __lex_alpha = {}
218     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
219         __lex_alpha[c] = True
220
221     def __lex_finish_keyword(self):
222         if self.buffer == "false":
223             self.__parser_input(False)
224         elif self.buffer == "true":
225             self.__parser_input(True)
226         elif self.buffer == "null":
227             self.__parser_input(None)
228         else:
229             self.__error("invalid keyword '%s'" % self.buffer)
230
231     def __lex_keyword(self, c):
232         if c in Parser.__lex_alpha:
233             self.buffer += c
234             return True
235         else:
236             self.__lex_finish_keyword()
237             return False
238
239     __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
240             "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
241
242     def __lex_finish_number(self):
243         s = self.buffer
244         m = Parser.__number_re.match(s)
245         if m:
246             sign, integer, fraction, exp = m.groups()
247             if (exp is not None and
248                 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
249                 self.__error("exponent outside valid range")
250                 return
251
252             if fraction is not None and len(fraction.lstrip('0')) == 0:
253                 fraction = None
254
255             sig_string = integer
256             if fraction is not None:
257                 sig_string += fraction
258             significand = int(sig_string)
259
260             pow10 = 0
261             if fraction is not None:
262                 pow10 -= len(fraction)
263             if exp is not None:
264                 pow10 += long(exp)
265
266             if significand == 0:
267                 self.__parser_input(0)
268                 return
269             elif significand <= 2 ** 63:
270                 while pow10 > 0 and significand <= 2 ** 63:
271                     significand *= 10
272                     pow10 -= 1
273                 while pow10 < 0 and significand % 10 == 0:
274                     significand /= 10
275                     pow10 += 1
276                 if (pow10 == 0 and
277                     ((not sign and significand < 2 ** 63) or
278                      (sign and significand <= 2 ** 63))):
279                     if sign:
280                         self.__parser_input(-significand)
281                     else:
282                         self.__parser_input(significand)
283                     return
284
285             value = float(s)
286             if value == float("inf") or value == float("-inf"):
287                 self.__error("number outside valid range")
288                 return
289             if value == 0:
290                 # Suppress negative zero.
291                 value = 0
292             self.__parser_input(value)
293         elif re.match("-?0[0-9]", s):
294             self.__error("leading zeros not allowed")
295         elif re.match("-([^0-9]|$)", s):
296             self.__error("'-' must be followed by digit")
297         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
298             self.__error("decimal point must be followed by digit")
299         elif re.search("e[-+]?([^0-9]|$)", s):
300             self.__error("exponent must contain at least one digit")
301         else:
302             self.__error("syntax error in number")
303
304     def __lex_number(self, c):
305         if c in ".0123456789eE-+":
306             self.buffer += c
307             return True
308         else:
309             self.__lex_finish_number()
310             return False
311
312     __4hex_re = re.compile("[0-9a-fA-F]{4}")
313
314     def __lex_4hex(self, s):
315         if len(s) < 4:
316             self.__error("quoted string ends within \\u escape")
317         elif not Parser.__4hex_re.match(s):
318             self.__error("malformed \\u escape")
319         elif s == "0000":
320             self.__error("null bytes not supported in quoted strings")
321         else:
322             return int(s, 16)
323
324     @staticmethod
325     def __is_leading_surrogate(c):
326         """Returns true if 'c' is a Unicode code point for a leading
327         surrogate."""
328         return c >= 0xd800 and c <= 0xdbff
329
330     @staticmethod
331     def __is_trailing_surrogate(c):
332         """Returns true if 'c' is a Unicode code point for a trailing
333         surrogate."""
334         return c >= 0xdc00 and c <= 0xdfff
335
336     @staticmethod
337     def __utf16_decode_surrogate_pair(leading, trailing):
338         """Returns the unicode code point corresponding to leading surrogate
339         'leading' and trailing surrogate 'trailing'.  The return value will not
340         make any sense if 'leading' or 'trailing' are not in the correct ranges
341         for leading or trailing surrogates."""
342         #  Leading surrogate:         110110wwwwxxxxxx
343         # Trailing surrogate:         110111xxxxxxxxxx
344         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
345         w = (leading >> 6) & 0xf
346         u = w + 1
347         x0 = leading & 0x3f
348         x1 = trailing & 0x3ff
349         return (u << 16) | (x0 << 10) | x1
350     __unescape = {'"': u'"',
351                   "\\": u"\\",
352                   "/": u"/",
353                   "b": u"\b",
354                   "f": u"\f",
355                   "n": u"\n",
356                   "r": u"\r",
357                   "t": u"\t"}
358
359     def __lex_finish_string(self):
360         inp = self.buffer
361         out = u""
362         while len(inp):
363             backslash = inp.find('\\')
364             if backslash == -1:
365                 out += inp
366                 break
367             out += inp[:backslash]
368             inp = inp[backslash + 1:]
369             if inp == "":
370                 self.__error("quoted string may not end with backslash")
371                 return
372
373             replacement = Parser.__unescape.get(inp[0])
374             if replacement is not None:
375                 out += replacement
376                 inp = inp[1:]
377                 continue
378             elif inp[0] != u'u':
379                 self.__error("bad escape \\%s" % inp[0])
380                 return
381
382             c0 = self.__lex_4hex(inp[1:5])
383             if c0 is None:
384                 return
385             inp = inp[5:]
386
387             if Parser.__is_leading_surrogate(c0):
388                 if inp[:2] != u'\\u':
389                     self.__error("malformed escaped surrogate pair")
390                     return
391                 c1 = self.__lex_4hex(inp[2:6])
392                 if c1 is None:
393                     return
394                 if not Parser.__is_trailing_surrogate(c1):
395                     self.__error("second half of escaped surrogate pair is "
396                                  "not trailing surrogate")
397                     return
398                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
399                 inp = inp[6:]
400             else:
401                 code_point = c0
402             out += unichr(code_point)
403         self.__parser_input('string', out)
404
405     def __lex_string_escape(self, c):
406         self.buffer += c
407         self.lex_state = Parser.__lex_string
408         return True
409
410     def __lex_string(self, c):
411         if c == '\\':
412             self.buffer += c
413             self.lex_state = Parser.__lex_string_escape
414         elif c == '"':
415             self.__lex_finish_string()
416         elif ord(c) >= 0x20:
417             self.buffer += c
418         else:
419             self.__error("U+%04X must be escaped in quoted string" % ord(c))
420         return True
421
422     def __lex_input(self, c):
423         eat = self.lex_state(self, c)
424         assert eat is True or eat is False
425         return eat
426
427     def __parse_start(self, token, unused_string):
428         if token == '{':
429             self.__push_object()
430         elif token == '[':
431             self.__push_array()
432         else:
433             self.__error("syntax error at beginning of input")
434
435     def __parse_end(self, unused_token, unused_string):
436         self.__error("trailing garbage at end of input")
437
438     def __parse_object_init(self, token, string):
439         if token == '}':
440             self.__parser_pop()
441         else:
442             self.__parse_object_name(token, string)
443
444     def __parse_object_name(self, token, string):
445         if token == 'string':
446             self.member_name = string
447             self.parse_state = Parser.__parse_object_colon
448         else:
449             self.__error("syntax error parsing object expecting string")
450
451     def __parse_object_colon(self, token, unused_string):
452         if token == ":":
453             self.parse_state = Parser.__parse_object_value
454         else:
455             self.__error("syntax error parsing object expecting ':'")
456
457     def __parse_object_value(self, token, string):
458         self.__parse_value(token, string, Parser.__parse_object_next)
459
460     def __parse_object_next(self, token, unused_string):
461         if token == ",":
462             self.parse_state = Parser.__parse_object_name
463         elif token == "}":
464             self.__parser_pop()
465         else:
466             self.__error("syntax error expecting '}' or ','")
467
468     def __parse_array_init(self, token, string):
469         if token == ']':
470             self.__parser_pop()
471         else:
472             self.__parse_array_value(token, string)
473
474     def __parse_array_value(self, token, string):
475         self.__parse_value(token, string, Parser.__parse_array_next)
476
477     def __parse_array_next(self, token, unused_string):
478         if token == ",":
479             self.parse_state = Parser.__parse_array_value
480         elif token == "]":
481             self.__parser_pop()
482         else:
483             self.__error("syntax error expecting ']' or ','")
484
485     def __parser_input(self, token, string=None):
486         self.lex_state = Parser.__lex_start
487         self.buffer = ""
488         self.parse_state(self, token, string)
489
490     def __put_value(self, value):
491         top = self.stack[-1]
492         if type(top) == dict:
493             top[self.member_name] = value
494         else:
495             top.append(value)
496
497     def __parser_push(self, new_json, next_state):
498         if len(self.stack) < Parser.MAX_HEIGHT:
499             if len(self.stack) > 0:
500                 self.__put_value(new_json)
501             self.stack.append(new_json)
502             self.parse_state = next_state
503         else:
504             self.__error("input exceeds maximum nesting depth %d" %
505                          Parser.MAX_HEIGHT)
506
507     def __push_object(self):
508         self.__parser_push({}, Parser.__parse_object_init)
509
510     def __push_array(self):
511         self.__parser_push([], Parser.__parse_array_init)
512
513     def __parser_pop(self):
514         if len(self.stack) == 1:
515             self.parse_state = Parser.__parse_end
516             if not self.check_trailer:
517                 self.done = True
518         else:
519             self.stack.pop()
520             top = self.stack[-1]
521             if type(top) == list:
522                 self.parse_state = Parser.__parse_array_next
523             else:
524                 self.parse_state = Parser.__parse_object_next
525
526     def __parse_value(self, token, string, next_state):
527         if token in [False, None, True] or type(token) in [int, long, float]:
528             self.__put_value(token)
529         elif token == 'string':
530             self.__put_value(string)
531         else:
532             if token == '{':
533                 self.__push_object()
534             elif token == '[':
535                 self.__push_array()
536             else:
537                 self.__error("syntax error expecting value")
538             return
539         self.parse_state = next_state
540
541     def __error(self, message):
542         if self.error is None:
543             self.error = ("line %d, column %d, byte %d: %s"
544                           % (self.line_number, self.column_number,
545                              self.byte_number, message))
546             self.done = True
547
548     def feed(self, s):
549         i = 0
550         while True:
551             if self.done or i >= len(s):
552                 return i
553
554             c = s[i]
555             if self.__lex_input(c):
556                 self.byte_number += 1
557                 if c == '\n':
558                     self.column_number = 0
559                     self.line_number += 1
560                 else:
561                     self.column_number += 1
562
563                 i += 1
564
565     def is_done(self):
566         return self.done
567
568     def finish(self):
569         if self.lex_state == Parser.__lex_start:
570             pass
571         elif self.lex_state in (Parser.__lex_string,
572                                 Parser.__lex_string_escape):
573             self.__error("unexpected end of input in quoted string")
574         else:
575             self.__lex_input(" ")
576
577         if self.parse_state == Parser.__parse_start:
578             self.__error("empty input stream")
579         elif self.parse_state != Parser.__parse_end:
580             self.__error("unexpected end of input")
581
582         if self.error == None:
583             assert len(self.stack) == 1
584             return self.stack.pop()
585         else:
586             return self.error