ovs-test: A new tool that allows to diagnose connectivity and performance issues
[sliver-openvswitch.git] / python / ovs / json.py
1 # Copyright (c) 2010, 2011 Nicira Networks
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import re
16 import StringIO
17 import sys
18
19 __pychecker__ = 'no-stringiter'
20
21 escapes = {ord('"'): u"\\\"",
22            ord("\\"): u"\\\\",
23            ord("\b"): u"\\b",
24            ord("\f"): u"\\f",
25            ord("\n"): u"\\n",
26            ord("\r"): u"\\r",
27            ord("\t"): u"\\t"}
28 for esc in range(32):
29     if esc not in escapes:
30         escapes[esc] = u"\\u%04x" % esc
31
32 SPACES_PER_LEVEL = 2
33
34
35 class _Serializer(object):
36     def __init__(self, stream, pretty, sort_keys):
37         self.stream = stream
38         self.pretty = pretty
39         self.sort_keys = sort_keys
40         self.depth = 0
41
42     def __serialize_string(self, s):
43         self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
44
45     def __indent_line(self):
46         if self.pretty:
47             self.stream.write('\n')
48             self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
49
50     def serialize(self, obj):
51         if obj is None:
52             self.stream.write(u"null")
53         elif obj is False:
54             self.stream.write(u"false")
55         elif obj is True:
56             self.stream.write(u"true")
57         elif type(obj) in (int, long):
58             self.stream.write(u"%d" % obj)
59         elif type(obj) == float:
60             self.stream.write("%.15g" % obj)
61         elif type(obj) == unicode:
62             self.__serialize_string(obj)
63         elif type(obj) == str:
64             self.__serialize_string(unicode(obj))
65         elif type(obj) == dict:
66             self.stream.write(u"{")
67
68             self.depth += 1
69             self.__indent_line()
70
71             if self.sort_keys:
72                 items = sorted(obj.items())
73             else:
74                 items = obj.iteritems()
75             for i, (key, value) in enumerate(items):
76                 if i > 0:
77                     self.stream.write(u",")
78                     self.__indent_line()
79                 self.__serialize_string(unicode(key))
80                 self.stream.write(u":")
81                 if self.pretty:
82                     self.stream.write(u' ')
83                 self.serialize(value)
84
85             self.stream.write(u"}")
86             self.depth -= 1
87         elif type(obj) in (list, tuple):
88             self.stream.write(u"[")
89             self.depth += 1
90
91             if obj:
92                 self.__indent_line()
93
94                 for i, value in enumerate(obj):
95                     if i > 0:
96                         self.stream.write(u",")
97                         self.__indent_line()
98                     self.serialize(value)
99
100             self.depth -= 1
101             self.stream.write(u"]")
102         else:
103             raise Exception("can't serialize %s as JSON" % obj)
104
105
106 def to_stream(obj, stream, pretty=False, sort_keys=True):
107     _Serializer(stream, pretty, sort_keys).serialize(obj)
108
109
110 def to_file(obj, name, pretty=False, sort_keys=True):
111     stream = open(name, "w")
112     try:
113         to_stream(obj, stream, pretty, sort_keys)
114     finally:
115         stream.close()
116
117
118 def to_string(obj, pretty=False, sort_keys=True):
119     output = StringIO.StringIO()
120     to_stream(obj, output, pretty, sort_keys)
121     s = output.getvalue()
122     output.close()
123     return s
124
125
126 def from_stream(stream):
127     p = Parser(check_trailer=True)
128     while True:
129         buf = stream.read(4096)
130         if buf == "" or p.feed(buf) != len(buf):
131             break
132     return p.finish()
133
134
135 def from_file(name):
136     stream = open(name, "r")
137     try:
138         return from_stream(stream)
139     finally:
140         stream.close()
141
142
143 def from_string(s):
144     try:
145         s = unicode(s, 'utf-8')
146     except UnicodeDecodeError, e:
147         seq = ' '.join(["0x%2x" % ord(c)
148                         for c in e.object[e.start:e.end] if ord(c) >= 0x80])
149         return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
150     p = Parser(check_trailer=True)
151     p.feed(s)
152     return p.finish()
153
154
155 class Parser(object):
156     ## Maximum height of parsing stack. ##
157     MAX_HEIGHT = 1000
158
159     def __init__(self, check_trailer=False):
160         self.check_trailer = check_trailer
161
162         # Lexical analysis.
163         self.lex_state = Parser.__lex_start
164         self.buffer = ""
165         self.line_number = 0
166         self.column_number = 0
167         self.byte_number = 0
168
169         # Parsing.
170         self.parse_state = Parser.__parse_start
171         self.stack = []
172         self.member_name = None
173
174         # Parse status.
175         self.done = False
176         self.error = None
177
178     def __lex_start_space(self, c):
179         pass
180
181     def __lex_start_alpha(self, c):
182         self.buffer = c
183         self.lex_state = Parser.__lex_keyword
184
185     def __lex_start_token(self, c):
186         self.__parser_input(c)
187
188     def __lex_start_number(self, c):
189         self.buffer = c
190         self.lex_state = Parser.__lex_number
191
192     def __lex_start_string(self, _):
193         self.lex_state = Parser.__lex_string
194
195     def __lex_start_error(self, c):
196         if ord(c) >= 32 and ord(c) < 128:
197             self.__error("invalid character '%s'" % c)
198         else:
199             self.__error("invalid character U+%04x" % ord(c))
200
201     __lex_start_actions = {}
202     for c in " \t\n\r":
203         __lex_start_actions[c] = __lex_start_space
204     for c in "abcdefghijklmnopqrstuvwxyz":
205         __lex_start_actions[c] = __lex_start_alpha
206     for c in "[{]}:,":
207         __lex_start_actions[c] = __lex_start_token
208     for c in "-0123456789":
209         __lex_start_actions[c] = __lex_start_number
210     __lex_start_actions['"'] = __lex_start_string
211
212     def __lex_start(self, c):
213         Parser.__lex_start_actions.get(
214             c, Parser.__lex_start_error)(self, c)
215         return True
216
217     __lex_alpha = {}
218     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
219         __lex_alpha[c] = True
220
221     def __lex_finish_keyword(self):
222         if self.buffer == "false":
223             self.__parser_input(False)
224         elif self.buffer == "true":
225             self.__parser_input(True)
226         elif self.buffer == "null":
227             self.__parser_input(None)
228         else:
229             self.__error("invalid keyword '%s'" % self.buffer)
230
231     def __lex_keyword(self, c):
232         if c in Parser.__lex_alpha:
233             self.buffer += c
234             return True
235         else:
236             self.__lex_finish_keyword()
237             return False
238
239     __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
240             "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
241
242     def __lex_finish_number(self):
243         s = self.buffer
244         m = Parser.__number_re.match(s)
245         if m:
246             sign, integer, fraction, exp = m.groups()
247             if (exp is not None and
248                 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
249                 self.__error("exponent outside valid range")
250                 return
251
252             if fraction is not None and len(fraction.lstrip('0')) == 0:
253                 fraction = None
254
255             sig_string = integer
256             if fraction is not None:
257                 sig_string += fraction
258             significand = int(sig_string)
259
260             pow10 = 0
261             if fraction is not None:
262                 pow10 -= len(fraction)
263             if exp is not None:
264                 pow10 += long(exp)
265
266             if significand == 0:
267                 self.__parser_input(0)
268                 return
269             elif significand <= 2 ** 63:
270                 while pow10 > 0 and significand <= 2 ** 63:
271                     significand *= 10
272                     pow10 -= 1
273                 while pow10 < 0 and significand % 10 == 0:
274                     significand /= 10
275                     pow10 += 1
276                 if (pow10 == 0 and
277                     ((not sign and significand < 2 ** 63) or
278                      (sign and significand <= 2 ** 63))):
279                     if sign:
280                         self.__parser_input(-significand)
281                     else:
282                         self.__parser_input(significand)
283                     return
284
285             value = float(s)
286             if value == float("inf") or value == float("-inf"):
287                 self.__error("number outside valid range")
288                 return
289             if value == 0:
290                 # Suppress negative zero.
291                 value = 0
292             self.__parser_input(value)
293         elif re.match("-?0[0-9]", s):
294             self.__error("leading zeros not allowed")
295         elif re.match("-([^0-9]|$)", s):
296             self.__error("'-' must be followed by digit")
297         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
298             self.__error("decimal point must be followed by digit")
299         elif re.search("e[-+]?([^0-9]|$)", s):
300             self.__error("exponent must contain at least one digit")
301         else:
302             self.__error("syntax error in number")
303
304     def __lex_number(self, c):
305         if c in ".0123456789eE-+":
306             self.buffer += c
307             return True
308         else:
309             self.__lex_finish_number()
310             return False
311
312     __4hex_re = re.compile("[0-9a-fA-F]{4}")
313
314     def __lex_4hex(self, s):
315         if len(s) < 4:
316             self.__error("quoted string ends within \\u escape")
317         elif not Parser.__4hex_re.match(s):
318             self.__error("malformed \\u escape")
319         elif s == "0000":
320             self.__error("null bytes not supported in quoted strings")
321         else:
322             return int(s, 16)
323
324     @staticmethod
325     def __is_leading_surrogate(c):
326         """Returns true if 'c' is a Unicode code point for a leading
327         surrogate."""
328         return c >= 0xd800 and c <= 0xdbff
329
330     @staticmethod
331     def __is_trailing_surrogate(c):
332         """Returns true if 'c' is a Unicode code point for a trailing
333         surrogate."""
334         return c >= 0xdc00 and c <= 0xdfff
335
336     @staticmethod
337     def __utf16_decode_surrogate_pair(leading, trailing):
338         """Returns the unicode code point corresponding to leading surrogate
339         'leading' and trailing surrogate 'trailing'.  The return value will not
340         make any sense if 'leading' or 'trailing' are not in the correct ranges
341         for leading or trailing surrogates."""
342         #  Leading surrogate:         110110wwwwxxxxxx
343         # Trailing surrogate:         110111xxxxxxxxxx
344         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
345         w = (leading >> 6) & 0xf
346         u = w + 1
347         x0 = leading & 0x3f
348         x1 = trailing & 0x3ff
349         return (u << 16) | (x0 << 10) | x1
350     __unescape = {'"': u'"',
351                   "\\": u"\\",
352                   "/": u"/",
353                   "b": u"\b",
354                   "f": u"\f",
355                   "n": u"\n",
356                   "r": u"\r",
357                   "t": u"\t"}
358
359     def __lex_finish_string(self):
360         inp = self.buffer
361         out = u""
362         while len(inp):
363             backslash = inp.find('\\')
364             if backslash == -1:
365                 out += inp
366                 break
367             out += inp[:backslash]
368             inp = inp[backslash + 1:]
369             if inp == "":
370                 self.__error("quoted string may not end with backslash")
371                 return
372
373             replacement = Parser.__unescape.get(inp[0])
374             if replacement is not None:
375                 out += replacement
376                 inp = inp[1:]
377                 continue
378             elif inp[0] != u'u':
379                 self.__error("bad escape \\%s" % inp[0])
380                 return
381
382             c0 = self.__lex_4hex(inp[1:5])
383             if c0 is None:
384                 return
385             inp = inp[5:]
386
387             if Parser.__is_leading_surrogate(c0):
388                 if inp[:2] != u'\\u':
389                     self.__error("malformed escaped surrogate pair")
390                     return
391                 c1 = self.__lex_4hex(inp[2:6])
392                 if c1 is None:
393                     return
394                 if not Parser.__is_trailing_surrogate(c1):
395                     self.__error("second half of escaped surrogate pair is "
396                                  "not trailing surrogate")
397                     return
398                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
399                 inp = inp[6:]
400             else:
401                 code_point = c0
402             out += unichr(code_point)
403         self.__parser_input('string', out)
404
405     def __lex_string_escape(self, c):
406         self.buffer += c
407         self.lex_state = Parser.__lex_string
408         return True
409
410     def __lex_string(self, c):
411         if c == '\\':
412             self.buffer += c
413             self.lex_state = Parser.__lex_string_escape
414         elif c == '"':
415             self.__lex_finish_string()
416         elif ord(c) >= 0x20:
417             self.buffer += c
418         else:
419             self.__error("U+%04X must be escaped in quoted string" % ord(c))
420         return True
421
422     def __lex_input(self, c):
423         self.byte_number += 1
424         if c == '\n':
425             self.column_number = 0
426             self.line_number += 1
427         else:
428             self.column_number += 1
429
430         eat = self.lex_state(self, c)
431         assert eat is True or eat is False
432         return eat
433
434     def __parse_start(self, token, unused_string):
435         if token == '{':
436             self.__push_object()
437         elif token == '[':
438             self.__push_array()
439         else:
440             self.__error("syntax error at beginning of input")
441
442     def __parse_end(self, unused_token, unused_string):
443         self.__error("trailing garbage at end of input")
444
445     def __parse_object_init(self, token, string):
446         if token == '}':
447             self.__parser_pop()
448         else:
449             self.__parse_object_name(token, string)
450
451     def __parse_object_name(self, token, string):
452         if token == 'string':
453             self.member_name = string
454             self.parse_state = Parser.__parse_object_colon
455         else:
456             self.__error("syntax error parsing object expecting string")
457
458     def __parse_object_colon(self, token, unused_string):
459         if token == ":":
460             self.parse_state = Parser.__parse_object_value
461         else:
462             self.__error("syntax error parsing object expecting ':'")
463
464     def __parse_object_value(self, token, string):
465         self.__parse_value(token, string, Parser.__parse_object_next)
466
467     def __parse_object_next(self, token, unused_string):
468         if token == ",":
469             self.parse_state = Parser.__parse_object_name
470         elif token == "}":
471             self.__parser_pop()
472         else:
473             self.__error("syntax error expecting '}' or ','")
474
475     def __parse_array_init(self, token, string):
476         if token == ']':
477             self.__parser_pop()
478         else:
479             self.__parse_array_value(token, string)
480
481     def __parse_array_value(self, token, string):
482         self.__parse_value(token, string, Parser.__parse_array_next)
483
484     def __parse_array_next(self, token, unused_string):
485         if token == ",":
486             self.parse_state = Parser.__parse_array_value
487         elif token == "]":
488             self.__parser_pop()
489         else:
490             self.__error("syntax error expecting ']' or ','")
491
492     def __parser_input(self, token, string=None):
493         self.lex_state = Parser.__lex_start
494         self.buffer = ""
495         self.parse_state(self, token, string)
496
497     def __put_value(self, value):
498         top = self.stack[-1]
499         if type(top) == dict:
500             top[self.member_name] = value
501         else:
502             top.append(value)
503
504     def __parser_push(self, new_json, next_state):
505         if len(self.stack) < Parser.MAX_HEIGHT:
506             if len(self.stack) > 0:
507                 self.__put_value(new_json)
508             self.stack.append(new_json)
509             self.parse_state = next_state
510         else:
511             self.__error("input exceeds maximum nesting depth %d" %
512                          Parser.MAX_HEIGHT)
513
514     def __push_object(self):
515         self.__parser_push({}, Parser.__parse_object_init)
516
517     def __push_array(self):
518         self.__parser_push([], Parser.__parse_array_init)
519
520     def __parser_pop(self):
521         if len(self.stack) == 1:
522             self.parse_state = Parser.__parse_end
523             if not self.check_trailer:
524                 self.done = True
525         else:
526             self.stack.pop()
527             top = self.stack[-1]
528             if type(top) == list:
529                 self.parse_state = Parser.__parse_array_next
530             else:
531                 self.parse_state = Parser.__parse_object_next
532
533     def __parse_value(self, token, string, next_state):
534         if token in [False, None, True] or type(token) in [int, long, float]:
535             self.__put_value(token)
536         elif token == 'string':
537             self.__put_value(string)
538         else:
539             if token == '{':
540                 self.__push_object()
541             elif token == '[':
542                 self.__push_array()
543             else:
544                 self.__error("syntax error expecting value")
545             return
546         self.parse_state = next_state
547
548     def __error(self, message):
549         if self.error is None:
550             self.error = ("line %d, column %d, byte %d: %s"
551                           % (self.line_number, self.column_number,
552                              self.byte_number, message))
553             self.done = True
554
555     def feed(self, s):
556         i = 0
557         while True:
558             if self.done or i >= len(s):
559                 return i
560             if self.__lex_input(s[i]):
561                 i += 1
562
563     def is_done(self):
564         return self.done
565
566     def finish(self):
567         if self.lex_state == Parser.__lex_start:
568             pass
569         elif self.lex_state in (Parser.__lex_string,
570                                 Parser.__lex_string_escape):
571             self.__error("unexpected end of input in quoted string")
572         else:
573             self.__lex_input(" ")
574
575         if self.parse_state == Parser.__parse_start:
576             self.__error("empty input stream")
577         elif self.parse_state != Parser.__parse_end:
578             self.__error("unexpected end of input")
579
580         if self.error == None:
581             assert len(self.stack) == 1
582             return self.stack.pop()
583         else:
584             return self.error