1 import types, string, re
4 Unserialize class for the PHP serialization format.
7 @author Scott Hurring; scott at hurring dot com
8 @copyright Copyright (c) 2005 Scott Hurring
9 @license http://opensource.org/licenses/gpl-license.php GNU Public License
10 $Id: PHPUnserialize.py,v 1.1 2006/01/08 21:53:19 shurring Exp $
12 Most recent version can be found at:
13 http://hurring.com/code/python/phpserialize/
16 # Create an instance of the unserialize engine
18 # unserialize some string into python data
19 data = u.unserialize(serialized_string)
21 Please see README.txt for more information.
24 class PHPUnserialize(object):
26 Class to unserialize something from the PHP Serialize format.
30 data = u.unserialize(serialized_string)
36 def session_decode(self, data):
37 """Thanks to Ken Restivo for suggesting the addition
42 m = re.match('^(\w+)\|', data)
46 (dtype, dataoffset, value) = self._unserialize(data, offset)
47 offset = offset + dataoffset
51 # No more stuff to decode
56 def unserialize(self, data):
57 return self._unserialize(data, 0)[2]
59 def _unserialize(self, data, offset=0):
61 Find the next token and unserialize it.
64 offset = raw offset from start of data
66 return (type, offset, value)
70 dtype = string.lower(data[offset:offset+1])
72 #print "# dtype =", dtype
75 dataoffset = offset + 2
76 typeconvert = lambda x : x
77 chars = datalength = 0
81 typeconvert = lambda x : int(x)
82 (chars, readdata) = self.read_until(data, dataoffset, ';')
83 # +1 for end semicolon
84 dataoffset += chars + 1
88 typeconvert = lambda x : (int(x) == 1)
89 (chars, readdata) = self.read_until(data, dataoffset, ';')
90 # +1 for end semicolon
91 dataoffset += chars + 1
93 # double => Floating Point
95 typeconvert = lambda x : float(x)
96 (chars, readdata) = self.read_until(data, dataoffset, ';')
97 # +1 for end semicolon
98 dataoffset += chars + 1
106 (chars, stringlength) = self.read_until(data, dataoffset, ':')
107 # +2 for colons around length field
108 dataoffset += chars + 2
111 (chars, readdata) = self.read_chars(data, dataoffset+1, int(stringlength))
112 # +2 for endquote semicolon
113 dataoffset += chars + 2
115 if chars != int(stringlength) != int(readdata):
116 raise Exception("String length mismatch")
119 # If you originally serialized a Tuple or List, it will
120 # be unserialized as a Dict. PHP doesn't have tuples or lists,
121 # only arrays - so everything has to get converted into an array
122 # when serializing and the original type of the array is lost
126 # If all dict keys are increasing ints from zero,
127 # then make it a list.
129 # just return the original dict
131 # list with no elements
134 # list with one element: 0
136 if type(keys[0]) == int and keys[0] == 0:
142 for i in range(1,len(keys)):
145 if type(x) == int and type(y) == int and y-x == 1:
151 # Empty {} and empty [] are ambiguous.
152 # A default could either be configured or assumed to be a list?
153 def dict_to_list(dict):
159 return [dict[k] for k in keys]
163 typeconvert = lambda x : dict_to_list(x)
165 # How many keys does this list have?
166 (chars, keys) = self.read_until(data, dataoffset, ':')
167 # +2 for colons around length field
168 dataoffset += chars + 2
170 # Loop through and fetch this number of key/value pairs
171 for i in range(0, int(keys)):
173 (ktype, kchars, key) = self._unserialize(data, dataoffset)
175 #print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)
177 # Read value of the key
178 (vtype, vchars, value) = self._unserialize(data, dataoffset)
180 #print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)
182 # Set the list element
183 readdata[key] = value
185 # +1 for end semicolon
187 #chars = int(dataoffset) - start
189 # I don't know how to unserialize this
191 raise Exception("Unknown / Unhandled data type (%s)!" % dtype)
194 return (dtype, dataoffset-offset, typeconvert(readdata))
196 def read_until(self, data, offset, stopchar):
198 Read from data[offset] until you encounter some char 'stopchar'.
201 char = data[offset:offset+1]
203 while char != stopchar:
204 # Consumed all the characters and havent found ';'
205 if i+offset > len(data):
206 raise Exception("Invalid")
208 char = data[offset+(i-1):offset+i]
212 return (len(buf), "".join(buf))
214 def read_chars(self, data, offset, length):
216 Read 'length' number of chars from data[offset].
219 # Account for the starting quote char
221 for i in range(0, length):
222 char = data[offset+(i-1):offset+i]
226 return (len(buf), "".join(buf))