X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=util%2FPHPUnserialize.py;fp=util%2FPHPUnserialize.py;h=d939ce0379a53c8e0b724c6ef63aa56038a11d81;hb=fd8e295c2e03bf2ed461fe522fd755cdb3fb99d0;hp=0000000000000000000000000000000000000000;hpb=2553c872a6bb8833643d284351d45042c1116a40;p=monitor.git diff --git a/util/PHPUnserialize.py b/util/PHPUnserialize.py new file mode 100644 index 0000000..d939ce0 --- /dev/null +++ b/util/PHPUnserialize.py @@ -0,0 +1,228 @@ +import types, string, re + +""" +Unserialize class for the PHP serialization format. + +@version v0.4 BETA +@author Scott Hurring; scott at hurring dot com +@copyright Copyright (c) 2005 Scott Hurring +@license http://opensource.org/licenses/gpl-license.php GNU Public License +$Id: PHPUnserialize.py,v 1.1 2006/01/08 21:53:19 shurring Exp $ + +Most recent version can be found at: +http://hurring.com/code/python/phpserialize/ + +Usage: +# Create an instance of the unserialize engine +u = PHPUnserialize() +# unserialize some string into python data +data = u.unserialize(serialized_string) + +Please see README.txt for more information. +""" + +class PHPUnserialize(object): + """ + Class to unserialize something from the PHP Serialize format. + + Usage: + u = PHPUnserialize() + data = u.unserialize(serialized_string) + """ + + def __init__(self): + pass + + def session_decode(self, data): + """Thanks to Ken Restivo for suggesting the addition + of session_encode + """ + session = {} + while len(data) > 0: + m = re.match('^(\w+)\|', data) + if m: + key = m.group(1) + offset = len(key)+1 + (dtype, dataoffset, value) = self._unserialize(data, offset) + offset = offset + dataoffset + data = data[offset:] + session[key] = value + else: + # No more stuff to decode + return session + + return session + + def unserialize(self, data): + return self._unserialize(data, 0)[2] + + def _unserialize(self, data, offset=0): + """ + Find the next token and unserialize it. + Recurse on array. + + offset = raw offset from start of data + + return (type, offset, value) + """ + + buf = [] + dtype = string.lower(data[offset:offset+1]) + + #print "# dtype =", dtype + + # 't:' = 2 chars + dataoffset = offset + 2 + typeconvert = lambda x : x + chars = datalength = 0 + + # int => Integer + if dtype == 'i': + typeconvert = lambda x : int(x) + (chars, readdata) = self.read_until(data, dataoffset, ';') + # +1 for end semicolon + dataoffset += chars + 1 + + # bool => Boolean + elif dtype == 'b': + typeconvert = lambda x : (int(x) == 1) + (chars, readdata) = self.read_until(data, dataoffset, ';') + # +1 for end semicolon + dataoffset += chars + 1 + + # double => Floating Point + elif dtype == 'd': + typeconvert = lambda x : float(x) + (chars, readdata) = self.read_until(data, dataoffset, ';') + # +1 for end semicolon + dataoffset += chars + 1 + + # n => None + elif dtype == 'n': + readdata = None + + # s => String + elif dtype == 's': + (chars, stringlength) = self.read_until(data, dataoffset, ':') + # +2 for colons around length field + dataoffset += chars + 2 + + # +1 for start quote + (chars, readdata) = self.read_chars(data, dataoffset+1, int(stringlength)) + # +2 for endquote semicolon + dataoffset += chars + 2 + + if chars != int(stringlength) != int(readdata): + raise Exception("String length mismatch") + + # array => Dict + # If you originally serialized a Tuple or List, it will + # be unserialized as a Dict. PHP doesn't have tuples or lists, + # only arrays - so everything has to get converted into an array + # when serializing and the original type of the array is lost + elif dtype == 'a': + readdata = {} + + # If all dict keys are increasing ints from zero, + # then make it a list. + # Else, + # just return the original dict + def is_list(keys): + # list with no elements + if len(keys) == 0: + return True + # list with one element: 0 + if len(keys) == 1: + if type(keys[0]) == int and keys[0] == 0: + return True + else: + return False + + # all other cases + for i in range(1,len(keys)): + x = keys[i-1] + y = keys[i] + if type(x) == int and type(y) == int and y-x == 1: + continue + else: + return False + return True + + # Empty {} and empty [] are ambiguous. + # A default could either be configured or assumed to be a list? + def dict_to_list(dict): + + keys = dict.keys() + keys.sort() + + if is_list(keys): + return [dict[k] for k in keys] + else: + return dict + + typeconvert = lambda x : dict_to_list(x) + + # How many keys does this list have? + (chars, keys) = self.read_until(data, dataoffset, ':') + # +2 for colons around length field + dataoffset += chars + 2 + + # Loop through and fetch this number of key/value pairs + for i in range(0, int(keys)): + # Read the key + (ktype, kchars, key) = self._unserialize(data, dataoffset) + dataoffset += kchars + #print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset) + + # Read value of the key + (vtype, vchars, value) = self._unserialize(data, dataoffset) + dataoffset += vchars + #print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset) + + # Set the list element + readdata[key] = value + + # +1 for end semicolon + dataoffset += 1 + #chars = int(dataoffset) - start + + # I don't know how to unserialize this + else: + raise Exception("Unknown / Unhandled data type (%s)!" % dtype) + + + return (dtype, dataoffset-offset, typeconvert(readdata)) + + def read_until(self, data, offset, stopchar): + """ + Read from data[offset] until you encounter some char 'stopchar'. + """ + buf = [] + char = data[offset:offset+1] + i = 2 + while char != stopchar: + # Consumed all the characters and havent found ';' + if i+offset > len(data): + raise Exception("Invalid") + buf.append(char) + char = data[offset+(i-1):offset+i] + i += 1 + + # (chars_read, data) + return (len(buf), "".join(buf)) + + def read_chars(self, data, offset, length): + """ + Read 'length' number of chars from data[offset]. + """ + buf = [] + # Account for the starting quote char + #offset += 1 + for i in range(0, length): + char = data[offset+(i-1):offset+i] + buf.append(char) + + # (chars_read, data) + return (len(buf), "".join(buf)) + +