Minor description of the dependencies that Monitor has for connecting to:
[monitor.git] / util / PHPUnserialize.py
1 import types, string, re
2
3 """
4 Unserialize class for the PHP serialization format.
5
6 @version v0.4 BETA
7 @author Scott Hurring; scott at hurring dot com
8 @copyright Copyright (c) 2005 Scott Hurring
9 @license http://opensource.org/licenses/gpl-license.php GNU Public License
10 $Id: PHPUnserialize.py,v 1.1 2006/01/08 21:53:19 shurring Exp $
11
12 Most recent version can be found at:
13 http://hurring.com/code/python/phpserialize/
14
15 Usage:
16 # Create an instance of the unserialize engine
17 u = PHPUnserialize()
18 # unserialize some string into python data
19 data = u.unserialize(serialized_string)
20
21 Please see README.txt for more information.
22 """
23
24 class PHPUnserialize(object):
25         """
26         Class to unserialize something from the PHP Serialize format.
27
28         Usage:
29         u = PHPUnserialize()
30         data = u.unserialize(serialized_string)
31         """
32
33         def __init__(self):
34                 pass
35
36         def session_decode(self, data):
37                 """Thanks to Ken Restivo for suggesting the addition
38                 of session_encode
39                 """
40                 session = {}
41                 while len(data) > 0:
42                         m = re.match('^(\w+)\|', data)
43                         if m:
44                                 key = m.group(1)
45                                 offset = len(key)+1
46                                 (dtype, dataoffset, value) = self._unserialize(data, offset)
47                                 offset = offset + dataoffset
48                                 data = data[offset:]
49                                 session[key] = value
50                         else:
51                                 # No more stuff to decode 
52                                 return session
53                 
54                 return session
55                 
56         def unserialize(self, data):
57                 return self._unserialize(data, 0)[2]
58
59         def _unserialize(self, data, offset=0):
60                 """
61                 Find the next token and unserialize it.
62                 Recurse on array.
63
64                 offset = raw offset from start of data
65                 
66                 return (type, offset, value)
67                 """
68
69                 buf = []
70                 dtype = string.lower(data[offset:offset+1])
71
72                 #print "# dtype =", dtype
73
74                 # 't:' = 2 chars
75                 dataoffset = offset + 2
76                 typeconvert = lambda x : x
77                 chars = datalength = 0
78
79                 # int => Integer
80                 if dtype == 'i':
81                         typeconvert = lambda x : int(x)
82                         (chars, readdata) = self.read_until(data, dataoffset, ';')
83                         # +1 for end semicolon
84                         dataoffset += chars + 1
85
86                 # bool => Boolean
87                 elif dtype == 'b':
88                         typeconvert = lambda x : (int(x) == 1)
89                         (chars, readdata) = self.read_until(data, dataoffset, ';')
90                         # +1 for end semicolon
91                         dataoffset += chars + 1
92
93                 # double => Floating Point
94                 elif dtype == 'd':
95                         typeconvert = lambda x : float(x)
96                         (chars, readdata) = self.read_until(data, dataoffset, ';')
97                         # +1 for end semicolon
98                         dataoffset += chars + 1
99
100                 # n => None
101                 elif dtype == 'n':
102                         readdata = None
103
104                 # s => String
105                 elif dtype == 's':
106                         (chars, stringlength) = self.read_until(data, dataoffset, ':')
107                         # +2 for colons around length field
108                         dataoffset += chars + 2
109
110                         # +1 for start quote
111                         (chars, readdata) = self.read_chars(data, dataoffset+1, int(stringlength))
112                         # +2 for endquote semicolon
113                         dataoffset += chars + 2
114
115                         if chars != int(stringlength) != int(readdata):
116                                 raise Exception("String length mismatch")
117
118                 # array => Dict
119                 # If you originally serialized a Tuple or List, it will
120                 # be unserialized as a Dict.  PHP doesn't have tuples or lists,
121                 # only arrays - so everything has to get converted into an array
122                 # when serializing and the original type of the array is lost
123                 elif dtype == 'a':
124                         readdata = {}
125
126                         # If all dict keys are increasing ints from zero, 
127                         #       then make it a list.
128                         # Else, 
129                         #       just return the original dict
130                         def is_list(keys):
131                                 # list with no elements
132                                 if len(keys) == 0:
133                                         return True
134                                 # list with one element: 0
135                                 if len(keys) == 1:
136                                         if type(keys[0]) == int and keys[0] == 0:
137                                                 return True
138                                         else:
139                                                 return False
140
141                                 # all other cases
142                                 for i in range(1,len(keys)):
143                                         x = keys[i-1]
144                                         y = keys[i]
145                                         if type(x) == int and type(y) == int and y-x == 1:
146                                                 continue
147                                         else:
148                                                 return False
149                                 return True
150
151                         # Empty {} and empty [] are ambiguous. 
152                         # A default could either be configured or assumed to be a list?
153                         def dict_to_list(dict):
154
155                                 keys = dict.keys()
156                                 keys.sort()
157
158                                 if is_list(keys):
159                                         return [dict[k] for k in keys]
160                                 else:
161                                         return dict
162                                 
163                         typeconvert = lambda x : dict_to_list(x)
164
165                         # How many keys does this list have?
166                         (chars, keys) = self.read_until(data, dataoffset, ':')
167                         # +2 for colons around length field
168                         dataoffset += chars + 2
169
170                         # Loop through and fetch this number of key/value pairs
171                         for i in range(0, int(keys)):
172                                 # Read the key
173                                 (ktype, kchars, key) = self._unserialize(data, dataoffset)
174                                 dataoffset += kchars
175                                 #print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)
176
177                                 # Read value of the key
178                                 (vtype, vchars, value) = self._unserialize(data, dataoffset)
179                                 dataoffset += vchars
180                                 #print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)
181
182                                 # Set the list element
183                                 readdata[key] = value
184
185                                 # +1 for end semicolon
186                         dataoffset += 1
187                         #chars = int(dataoffset) - start
188
189                 # I don't know how to unserialize this
190                 else:
191                         raise Exception("Unknown / Unhandled data type (%s)!" % dtype)
192
193
194                 return (dtype, dataoffset-offset, typeconvert(readdata))
195
196         def read_until(self, data, offset, stopchar):
197                 """
198                 Read from data[offset] until you encounter some char 'stopchar'.
199                 """
200                 buf = []
201                 char = data[offset:offset+1]
202                 i = 2
203                 while char != stopchar:
204                         # Consumed all the characters and havent found ';'
205                         if i+offset > len(data):
206                                 raise Exception("Invalid")
207                         buf.append(char)
208                         char = data[offset+(i-1):offset+i]
209                         i += 1
210
211                 # (chars_read, data)
212                 return (len(buf), "".join(buf))
213
214         def read_chars(self, data, offset, length):
215                 """
216                 Read 'length' number of chars from data[offset].
217                 """
218                 buf = []
219                 # Account for the starting quote char
220                 #offset += 1
221                 for i in range(0, length):
222                         char = data[offset+(i-1):offset+i]
223                         buf.append(char)
224
225                 # (chars_read, data)
226                 return (len(buf), "".join(buf))
227
228