sfa/util/rspec.py

   1 ### $Id$
   2 ### $URL$
   3
   4 import sys
   5 import pprint
   6 import os
   7 import httplib
   8 from xml.dom import minidom
   9 from types import StringTypes, ListType
  10
  11 class RSpec:
  12
  13     def __init__(self, xml = None, xsd = None, NSURL = None):
  14         '''
  15         Class to manipulate RSpecs.  Reads and parses rspec xml into python dicts
  16         and reads python dicts and writes rspec xml
  17
  18         self.xsd = # Schema.  Can be local or remote file.
  19         self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
  20         self.rootNode = # root of the DOM
  21         self.dict = # dict of the RSpec.
  22         self.schemaDict = {} # dict of the Schema
  23         '''
  24
  25         self.xsd = xsd
  26         self.rootNode = None
  27         self.dict = {}
  28         self.schemaDict = {}
  29         self.NSURL = NSURL
  30         if xml:
  31             if type(xml) == file:
  32                 self.parseFile(xml)
  33             if type(xml) in StringTypes:
  34                 self.parseString(xml)
  35             self.dict = self.toDict()
  36         if xsd:
  37             self._parseXSD(self.NSURL + self.xsd)
  38
  39
  40     def _getText(self, nodelist):
  41         rc = ""
  42         for node in nodelist:
  43             if node.nodeType == node.TEXT_NODE:
  44                 rc = rc + node.data
  45         return rc
  46
  47     # The rspec is comprised of 2 parts, and 1 reference:
  48     # attributes/elements describe individual resources
  49     # complexTypes are used to describe a set of attributes/elements
  50     # complexTypes can include a reference to other complexTypes.
  51
  52
  53     def _getName(self, node):
  54         '''Gets name of node. If tag has no name, then return tag's localName'''
  55         name = None
  56         if not node.nodeName.startswith("#"):
  57             if node.localName:
  58                 name = node.localName
  59             elif node.attributes.has_key("name"):
  60                 name = node.attributes.get("name").value
  61         return name
  62
  63
  64     # Attribute.  {name : nameofattribute, {items: values})
  65     def _attributeDict(self, attributeDom):
  66         '''Traverse single attribute node.  Create a dict {attributename : {name: value,}]}'''
  67         node = {} # parsed dict
  68         for attr in attributeDom.attributes.keys():
  69             node[attr] = attributeDom.attributes.get(attr).value
  70         return node
  71
  72
  73     def appendToDictOrCreate(self, dict, key, value):
  74         if (dict.has_key(key)):
  75             dict[key].append(value)
  76         else:
  77             dict[key]=[value]
  78         return dict
  79
  80     def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
  81         """
  82         convert an XML to a nested dict:
  83           * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
  84           * Terminal nodes (the rest) are nested dictionaries
  85         """
  86
  87         if (not nodeDom):
  88             nodeDom=self.rootNode
  89
  90         curNodeName = nodeDom.localName
  91
  92         if (nodeDom.hasChildNodes()):
  93             childdict={}
  94             for attribute in nodeDom.attributes.keys():
  95                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
  96             for child in nodeDom.childNodes[:-1]:
  97                 if (child.nodeValue):
  98                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
  99                 else:
 100                     childdict = self.toGenDict(child, None, childdict, curNodeName)
 101
 102             child = nodeDom.childNodes[-1]
 103             if (child.nodeValue):
 104                 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 105                 if (childdict):
 106                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 107             else:
 108                 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
 109         else:
 110             childdict={}
 111             for attribute in nodeDom.attributes.keys():
 112                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
 113
 114             self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 115
 116         if (parentdict is not None):
 117             parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
 118             return parentdict
 119         else:
 120             return siblingdict
 121
 122
 123
 124     def toDict(self, nodeDom = None):
 125         """
 126         convert this rspec to a dict and return it.
 127         """
 128         node = {}
 129         if not nodeDom:
 130              nodeDom = self.rootNode
 131
 132         elementName = nodeDom.nodeName
 133         if elementName and not elementName.startswith("#"):
 134             # attributes have tags and values.  get {tag: value}, else {type: value}
 135             node[elementName] = self._attributeDict(nodeDom)
 136             # resolve the child nodes.
 137             if nodeDom.hasChildNodes():
 138                 for child in nodeDom.childNodes:
 139                     childName = self._getName(child)
 140
 141                     # skip null children
 142                     if not childName: continue
 143
 144                     # initialize the possible array of children
 145                     if not node[elementName].has_key(childName): node[elementName][childName] = []
 146
 147                     if isinstance(child, minidom.Text):
 148                         # add if data is not empty
 149                         if child.data.strip():
 150                             node[elementName][childName].append(nextchild.data)
 151                     else:
 152                         childdict = self.toDict(child)
 153                         for value in childdict.values():
 154                             node[elementName][childName].append(value)
 155
 156         return node
 157
 158
 159     def toxml(self):
 160         """
 161         convert this rspec to an xml string and return it.
 162         """
 163         return self.rootNode.toxml()
 164
 165
 166     def toprettyxml(self):
 167         """
 168         print this rspec in xml in a pretty format.
 169         """
 170         return self.rootNode.toprettyxml()
 171
 172
 173     def parseFile(self, filename):
 174         """
 175         read a local xml file and store it as a dom object.
 176         """
 177         dom = minidom.parse(filename)
 178         self.rootNode = dom.childNodes[0]
 179
 180
 181     def parseString(self, xml):
 182         """
 183         read an xml string and store it as a dom object.
 184         """
 185         xml = xml.replace('\n', '').replace('\t', '').strip()
 186         dom = minidom.parseString(xml)
 187         self.rootNode = dom.childNodes[0]
 188
 189
 190     def _httpGetXSD(self, xsdURI):
 191         # split the URI into relevant parts
 192         host = xsdURI.split("/")[2]
 193         if xsdURI.startswith("https"):
 194             conn = httplib.HTTPSConnection(host,
 195                 httplib.HTTPSConnection.default_port)
 196         elif xsdURI.startswith("http"):
 197             conn = httplib.HTTPConnection(host,
 198                 httplib.HTTPConnection.default_port)
 199         conn.request("GET", xsdURI)
 200         # If we can't download the schema, raise an exception
 201         r1 = conn.getresponse()
 202         if r1.status != 200:
 203             raise Exception
 204         return r1.read().replace('\n', '').replace('\t', '').strip()
 205
 206
 207     def _parseXSD(self, xsdURI):
 208         """
 209         Download XSD from URL, or if file, read local xsd file and set schemaDict
 210         """
 211         # Since the schema definiton is a global namespace shared by and agreed upon by
 212         # others, this should probably be a URL.  Check for URL, download xsd, parse, or
 213         # if local file, use local file.
 214         schemaDom = None
 215         if xsdURI.startswith("http"):
 216             try:
 217                 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
 218             except Exception, e:
 219                 # logging.debug("%s: web file not found" % xsdURI)
 220                 # logging.debug("Using local file %s" % self.xsd")
 221                 print e
 222                 print "Can't find %s on the web. Continuing." % xsdURI
 223         if not schemaDom:
 224             if os.path.exists(xsdURI):
 225                 # logging.debug("using local copy.")
 226                 print "Using local %s" % xsdURI
 227                 schemaDom = minidom.parse(xsdURI)
 228             else:
 229                 raise Exception("Can't find xsd locally")
 230         self.schemaDict = self.toDict(schemaDom.childNodes[0])
 231
 232
 233     def dict2dom(self, rdict, include_doc = False):
 234         """
 235         convert a dict object into a dom object.
 236         """
 237
 238         def elementNode(tagname, rd):
 239             element = minidom.Element(tagname)
 240             for key in rd.keys():
 241                 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
 242                     element.setAttribute(key, str(rd[key]))
 243                 elif isinstance(rd[key], dict):
 244                     child = elementNode(key, rd[key])
 245                     element.appendChild(child)
 246                 elif isinstance(rd[key], list):
 247                     for item in rd[key]:
 248                         if isinstance(item, dict):
 249                             child = elementNode(key, item)
 250                             element.appendChild(child)
 251                         elif isinstance(item, StringTypes) or isinstance(item, int):
 252                             child = minidom.Element(key)
 253                             text = minidom.Text()
 254                             text.data = item
 255                             child.appendChild(text)
 256                             element.appendChild(child)
 257             return element
 258
 259         # Minidom does not allow documents to have more then one
 260         # child, but elements may have many children. Because of
 261         # this, the document's root node will be the first key/value
 262         # pair in the dictionary.
 263         node = elementNode(rdict.keys()[0], rdict.values()[0])
 264         if include_doc:
 265             rootNode = minidom.Document()
 266             rootNode.appendChild(node)
 267         else:
 268             rootNode = node
 269         return rootNode
 270
 271
 272     def parseDict(self, rdict, include_doc = True):
 273         """
 274         Convert a dictionary into a dom object and store it.
 275         """
 276         self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
 277
 278
 279     def getDictsByTagName(self, tagname, dom = None):
 280         """
 281         Search the dom for all elements with the specified tagname
 282         and return them as a list of dicts
 283         """
 284         if not dom:
 285             dom = self.rootNode
 286         dicts = []
 287         doms = dom.getElementsByTagName(tagname)
 288         dictlist = [self.toDict(d) for d in doms]
 289         for item in dictlist:
 290             for value in item.values():
 291                 dicts.append(value)
 292         return dicts
 293
 294     def getDictByTagNameValue(self, tagname, value, dom = None):
 295         """
 296         Search the dom for the first element with the specified tagname
 297         and value and return it as a dict.
 298         """
 299         tempdict = {}
 300         if not dom:
 301             dom = self.rootNode
 302         dicts = self.getDictsByTagName(tagname, dom)
 303
 304         for rdict in dicts:
 305             if rdict.has_key('name') and rdict['name'] in [value]:
 306                 return rdict
 307
 308         return tempdict
 309
 310
 311     def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
 312         """
 313         Removes all elements where:
 314         1. tagname matches the element tag
 315         2. attribute matches the element attribte
 316         3. attribute value is in valuelist
 317         """
 318
 319         tempdict = {}
 320         if not dom:
 321             dom = self.rootNode
 322
 323         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 324             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 325                 dom.parentNode.removeChild(dom)
 326             if blacklist and dom.attributes.get(attribute).value in blacklist:
 327                 dom.parentNode.removeChild(dom)
 328
 329         if dom.hasChildNodes():
 330             for child in dom.childNodes:
 331                 self.filter(tagname, attribute, blacklist, whitelist, child)
 332
 333
 334     def merge(self, rspecs, tagname, dom=None):
 335         """
 336         Merge this rspec with the requested rspec based on the specified
 337         starting tag name. The start tag (and all of its children) will be merged
 338         """
 339         tempdict = {}
 340         if not dom:
 341             dom = self.rootNode
 342
 343         whitelist = []
 344         blacklist = []
 345
 346         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 347             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 348                 dom.parentNode.removeChild(dom)
 349             if blacklist and dom.attributes.get(attribute).value in blacklist:
 350                 dom.parentNode.removeChild(dom)
 351
 352         if dom.hasChildNodes():
 353             for child in dom.childNodes:
 354                 self.filter(tagname, attribute, blacklist, whitelist, child)
 355
 356     def validateDicts(self):
 357         types = {
 358             'EInt' : int,
 359             'EString' : str,
 360             'EByteArray' : list,
 361             'EBoolean' : bool,
 362             'EFloat' : float,
 363             'EDate' : date}
 364
 365
 366     def pprint(self, r = None, depth = 0):
 367         """
 368         Pretty print the dict
 369         """
 370         line = ""
 371         if r == None: r = self.dict
 372         # Set the dept
 373         for tab in range(0,depth): line += "    "
 374         # check if it's nested
 375         if type(r) == dict:
 376             for i in r.keys():
 377                 print line + "%s:" % i
 378                 self.pprint(r[i], depth + 1)
 379         elif type(r) in (tuple, list):
 380             for j in r: self.pprint(j, depth + 1)
 381         # not nested so just print.
 382         else:
 383             print line + "%s" %  r
 384
 385
 386
 387 class RecordSpec(RSpec):
 388
 389     root_tag = 'record'
 390     def parseDict(self, rdict, include_doc = False):
 391         """
 392         Convert a dictionary into a dom object and store it.
 393         """
 394         self.rootNode = self.dict2dom(rdict, include_doc)
 395
 396     def dict2dom(self, rdict, include_doc = False):
 397         record_dict = rdict
 398         if not len(rdict.keys()) == 1:
 399             record_dict = {self.root_tag : rdict}
 400         return RSpec.dict2dom(self, record_dict, include_doc)
 401
 402
 403 # vim:ts=4:expandtab
 404