--- /dev/null
+import types, string
+
+"""
+Serialize class for the PHP serialization format.
+
+@version v0.4 BETA
+@author Scott Hurring; scott at hurring dot com
+@copyright Copyright (c) 2005 Scott Hurring
+@license http://opensource.org/licenses/gpl-license.php GNU Public License
+$Id: PHPSerialize.py,v 1.1 2006/01/08 21:53:19 shurring Exp $
+
+Most recent version can be found at:
+http://hurring.com/code/python/phpserialize/
+
+Usage:
+# Create an instance of the serialize engine
+s = PHPSerialize()
+# serialize some python data into a string
+serialized_string = s.serialize(string)
+# encode a session list (php's session_encode)
+serialized_string = s.session_encode(list)
+
+See README.txt for more information.
+"""
+
+class PHPSerialize(object):
+ """
+ Class to serialize data using the PHP Serialize format.
+
+ Usage:
+ serialized_string = PHPSerialize().serialize(data)
+ serialized_string = PHPSerialize().session_encode(list)
+ """
+
+ def __init__(self):
+ pass
+
+ def session_encode(self, session):
+ """Thanks to Ken Restivo for suggesting the addition
+ of session_encode
+ """
+ out = ""
+ for (k,v) in session.items():
+ out = out + "%s|%s" % (k, self.serialize(v))
+ return out
+
+ def serialize(self, data):
+ return self.serialize_value(data)
+
+ def is_int(self, data):
+ """
+ Determine if a string var looks like an integer
+ TODO: Make this do what PHP does, instead of a hack
+ """
+ try:
+ int(data)
+ return True
+ except:
+ return False
+
+ def serialize_key(self, data):
+ """
+ Serialize a key, which follows different rules than when
+ serializing values. Many thanks to Todd DeLuca for pointing
+ out that keys are serialized differently than values!
+
+ From http://us2.php.net/manual/en/language.types.array.php
+ A key may be either an integer or a string.
+ If a key is the standard representation of an integer, it will be
+ interpreted as such (i.e. "8" will be interpreted as int 8,
+ while "08" will be interpreted as "08").
+ Floats in key are truncated to integer.
+ """
+ # Integer, Long, Float, Boolean => integer
+ if type(data) is types.IntType or type(data) is types.LongType \
+ or type(data) is types.FloatType or type(data) is types.BooleanType:
+ return "i:%s;" % int(data)
+
+ # String => string or String => int (if string looks like int)
+ elif type(data) is types.StringType:
+ if self.is_int(data):
+ return "i:%s;" % int(data)
+ else:
+ return "s:%i:\"%s\";" % (len(data), data);
+
+ # None / NULL => empty string
+ elif type(data) is types.NoneType:
+ return "s:0:\"\";"
+
+ # I dont know how to serialize this
+ else:
+ raise Exception("Unknown / Unhandled key type (%s)!" % type(data))
+
+
+ def serialize_value(self, data):
+ """
+ Serialize a value.
+ """
+
+ # Integer => integer
+ if type(data) is types.IntType:
+ return "i:%s;" % data
+
+ # Float, Long => double
+ elif type(data) is types.FloatType or type(data) is types.LongType:
+ return "d:%s;" % data
+
+ # String => string or String => int (if string looks like int)
+ # Thanks to Todd DeLuca for noticing that PHP strings that
+ # look like integers are serialized as ints by PHP
+ elif type(data) is types.StringType:
+ if self.is_int(data):
+ return "i:%s;" % int(data)
+ else:
+ return "s:%i:\"%s\";" % (len(data), data);
+
+ # None / NULL
+ elif type(data) is types.NoneType:
+ return "N;";
+
+ # Tuple and List => array
+ # The 'a' array type is the only kind of list supported by PHP.
+ # array keys are automagically numbered up from 0
+ elif type(data) is types.ListType or type(data) is types.TupleType:
+ i = 0
+ out = []
+ # All arrays must have keys
+ for k in data:
+ out.append(self.serialize_key(i))
+ out.append(self.serialize_value(k))
+ i += 1
+ return "a:%i:{%s}" % (len(data), "".join(out))
+
+ # Dict => array
+ # Dict is the Python analogy of a PHP array
+ elif type(data) is types.DictType:
+ out = []
+ for k in data:
+ out.append(self.serialize_key(k))
+ out.append(self.serialize_value(data[k]))
+ return "a:%i:{%s}" % (len(data), "".join(out))
+
+ # Boolean => bool
+ elif type(data) is types.BooleanType:
+ return "b:%i;" % (data == 1)
+
+ # I dont know how to serialize this
+ else:
+ raise Exception("Unknown / Unhandled data type (%s)!" % type(data))
--- /dev/null
+import types, string, re
+
+"""
+Unserialize class for the PHP serialization format.
+
+@version v0.4 BETA
+@author Scott Hurring; scott at hurring dot com
+@copyright Copyright (c) 2005 Scott Hurring
+@license http://opensource.org/licenses/gpl-license.php GNU Public License
+$Id: PHPUnserialize.py,v 1.1 2006/01/08 21:53:19 shurring Exp $
+
+Most recent version can be found at:
+http://hurring.com/code/python/phpserialize/
+
+Usage:
+# Create an instance of the unserialize engine
+u = PHPUnserialize()
+# unserialize some string into python data
+data = u.unserialize(serialized_string)
+
+Please see README.txt for more information.
+"""
+
+class PHPUnserialize(object):
+ """
+ Class to unserialize something from the PHP Serialize format.
+
+ Usage:
+ u = PHPUnserialize()
+ data = u.unserialize(serialized_string)
+ """
+
+ def __init__(self):
+ pass
+
+ def session_decode(self, data):
+ """Thanks to Ken Restivo for suggesting the addition
+ of session_encode
+ """
+ session = {}
+ while len(data) > 0:
+ m = re.match('^(\w+)\|', data)
+ if m:
+ key = m.group(1)
+ offset = len(key)+1
+ (dtype, dataoffset, value) = self._unserialize(data, offset)
+ offset = offset + dataoffset
+ data = data[offset:]
+ session[key] = value
+ else:
+ # No more stuff to decode
+ return session
+
+ return session
+
+ def unserialize(self, data):
+ return self._unserialize(data, 0)[2]
+
+ def _unserialize(self, data, offset=0):
+ """
+ Find the next token and unserialize it.
+ Recurse on array.
+
+ offset = raw offset from start of data
+
+ return (type, offset, value)
+ """
+
+ buf = []
+ dtype = string.lower(data[offset:offset+1])
+
+ #print "# dtype =", dtype
+
+ # 't:' = 2 chars
+ dataoffset = offset + 2
+ typeconvert = lambda x : x
+ chars = datalength = 0
+
+ # int => Integer
+ if dtype == 'i':
+ typeconvert = lambda x : int(x)
+ (chars, readdata) = self.read_until(data, dataoffset, ';')
+ # +1 for end semicolon
+ dataoffset += chars + 1
+
+ # bool => Boolean
+ elif dtype == 'b':
+ typeconvert = lambda x : (int(x) == 1)
+ (chars, readdata) = self.read_until(data, dataoffset, ';')
+ # +1 for end semicolon
+ dataoffset += chars + 1
+
+ # double => Floating Point
+ elif dtype == 'd':
+ typeconvert = lambda x : float(x)
+ (chars, readdata) = self.read_until(data, dataoffset, ';')
+ # +1 for end semicolon
+ dataoffset += chars + 1
+
+ # n => None
+ elif dtype == 'n':
+ readdata = None
+
+ # s => String
+ elif dtype == 's':
+ (chars, stringlength) = self.read_until(data, dataoffset, ':')
+ # +2 for colons around length field
+ dataoffset += chars + 2
+
+ # +1 for start quote
+ (chars, readdata) = self.read_chars(data, dataoffset+1, int(stringlength))
+ # +2 for endquote semicolon
+ dataoffset += chars + 2
+
+ if chars != int(stringlength) != int(readdata):
+ raise Exception("String length mismatch")
+
+ # array => Dict
+ # If you originally serialized a Tuple or List, it will
+ # be unserialized as a Dict. PHP doesn't have tuples or lists,
+ # only arrays - so everything has to get converted into an array
+ # when serializing and the original type of the array is lost
+ elif dtype == 'a':
+ readdata = {}
+
+ # If all dict keys are increasing ints from zero,
+ # then make it a list.
+ # Else,
+ # just return the original dict
+ def is_list(keys):
+ # list with no elements
+ if len(keys) == 0:
+ return True
+ # list with one element: 0
+ if len(keys) == 1:
+ if type(keys[0]) == int and keys[0] == 0:
+ return True
+ else:
+ return False
+
+ # all other cases
+ for i in range(1,len(keys)):
+ x = keys[i-1]
+ y = keys[i]
+ if type(x) == int and type(y) == int and y-x == 1:
+ continue
+ else:
+ return False
+ return True
+
+ # Empty {} and empty [] are ambiguous.
+ # A default could either be configured or assumed to be a list?
+ def dict_to_list(dict):
+
+ keys = dict.keys()
+ keys.sort()
+
+ if is_list(keys):
+ return [dict[k] for k in keys]
+ else:
+ return dict
+
+ typeconvert = lambda x : dict_to_list(x)
+
+ # How many keys does this list have?
+ (chars, keys) = self.read_until(data, dataoffset, ':')
+ # +2 for colons around length field
+ dataoffset += chars + 2
+
+ # Loop through and fetch this number of key/value pairs
+ for i in range(0, int(keys)):
+ # Read the key
+ (ktype, kchars, key) = self._unserialize(data, dataoffset)
+ dataoffset += kchars
+ #print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)
+
+ # Read value of the key
+ (vtype, vchars, value) = self._unserialize(data, dataoffset)
+ dataoffset += vchars
+ #print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)
+
+ # Set the list element
+ readdata[key] = value
+
+ # +1 for end semicolon
+ dataoffset += 1
+ #chars = int(dataoffset) - start
+
+ # I don't know how to unserialize this
+ else:
+ raise Exception("Unknown / Unhandled data type (%s)!" % dtype)
+
+
+ return (dtype, dataoffset-offset, typeconvert(readdata))
+
+ def read_until(self, data, offset, stopchar):
+ """
+ Read from data[offset] until you encounter some char 'stopchar'.
+ """
+ buf = []
+ char = data[offset:offset+1]
+ i = 2
+ while char != stopchar:
+ # Consumed all the characters and havent found ';'
+ if i+offset > len(data):
+ raise Exception("Invalid")
+ buf.append(char)
+ char = data[offset+(i-1):offset+i]
+ i += 1
+
+ # (chars_read, data)
+ return (len(buf), "".join(buf))
+
+ def read_chars(self, data, offset, length):
+ """
+ Read 'length' number of chars from data[offset].
+ """
+ buf = []
+ # Account for the starting quote char
+ #offset += 1
+ for i in range(0, length):
+ char = data[offset+(i-1):offset+i]
+ buf.append(char)
+
+ # (chars_read, data)
+ return (len(buf), "".join(buf))
+
+