sfa/util/rspec.py

   1 ### $Id$
   2 ### $URL$
   3
   4 import sys
   5 import pprint
   6 import os
   7 import httplib
   8 from xml.dom import minidom
   9 from types import StringTypes, ListType
  10
  11 class RSpec:
  12
  13     def __init__(self, xml = None, xsd = None, NSURL = None):
  14         '''
  15         Class to manipulate RSpecs.  Reads and parses rspec xml into python dicts
  16         and reads python dicts and writes rspec xml
  17
  18         self.xsd = # Schema.  Can be local or remote file.
  19         self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
  20         self.rootNode = # root of the DOM
  21         self.dict = # dict of the RSpec.
  22         self.schemaDict = {} # dict of the Schema
  23         '''
  24
  25         self.xsd = xsd
  26         self.rootNode = None
  27         self.dict = {}
  28         self.schemaDict = {}
  29         self.NSURL = NSURL
  30         if xml:
  31             if type(xml) == file:
  32                 self.parseFile(xml)
  33             if type(xml) in StringTypes:
  34                 self.parseString(xml)
  35             self.dict = self.toDict()
  36         if xsd:
  37             self._parseXSD(self.NSURL + self.xsd)
  38
  39
  40     def _getText(self, nodelist):
  41         rc = ""
  42         for node in nodelist:
  43             if node.nodeType == node.TEXT_NODE:
  44                 rc = rc + node.data
  45         return rc
  46
  47     # The rspec is comprised of 2 parts, and 1 reference:
  48     # attributes/elements describe individual resources
  49     # complexTypes are used to describe a set of attributes/elements
  50     # complexTypes can include a reference to other complexTypes.
  51
  52
  53     def _getName(self, node):
  54         '''Gets name of node. If tag has no name, then return tag's localName'''
  55         name = None
  56         if not node.nodeName.startswith("#"):
  57             if node.localName:
  58                 name = node.localName
  59             elif node.attributes.has_key("name"):
  60                 name = node.attributes.get("name").value
  61         return name
  62
  63
  64     # Attribute.  {name : nameofattribute, {items: values})
  65     def _attributeDict(self, attributeDom):
  66         '''Traverse single attribute node.  Create a dict {attributename : {name: value,}]}'''
  67         node = {} # parsed dict
  68         for attr in attributeDom.attributes.keys():
  69             node[attr] = attributeDom.attributes.get(attr).value
  70         return node
  71
  72
  73     def appendToDictOrCreate(self, dict, key, value):
  74         if (dict.has_key(key)):
  75             dict[key].append(value)
  76         else:
  77             dict[key]=[value]
  78         return dict
  79
  80     def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
  81         """
  82         convert an XML to a nested dict:
  83           * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
  84           * Terminal nodes (the rest) are nested dictionaries
  85         """
  86
  87         if (not nodeDom):
  88             nodeDom=self.rootNode
  89
  90         curNodeName = nodeDom.localName
  91
  92         if (nodeDom.hasChildNodes()):
  93             childdict={}
  94             for attribute in nodeDom.attributes.keys():
  95                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
  96             for child in nodeDom.childNodes[:-1]:
  97                 if (child.nodeValue):
  98                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
  99                 else:
 100                     childdict = self.toGenDict(child, None, childdict, curNodeName)
 101
 102             child = nodeDom.childNodes[-1]
 103             if (child.nodeValue):
 104                 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 105                 if (childdict):
 106                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 107             else:
 108                 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
 109         else:
 110             childdict={}
 111             for attribute in nodeDom.attributes.keys():
 112                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
 113
 114             self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 115
 116         if (parentdict is not None):
 117             parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
 118             return parentdict
 119         else:
 120             return siblingdict
 121
 122
 123
 124     def toDict(self, nodeDom = None):
 125         """
 126         convert this rspec to a dict and return it.
 127         """
 128         node = {}
 129         if not nodeDom:
 130              nodeDom = self.rootNode
 131
 132         elementName = nodeDom.nodeName
 133         if elementName and not elementName.startswith("#"):
 134             # attributes have tags and values.  get {tag: value}, else {type: value}
 135             node[elementName] = self._attributeDict(nodeDom)
 136             # resolve the child nodes.
 137             if nodeDom.hasChildNodes():
 138                 for child in nodeDom.childNodes:
 139                     childName = self._getName(child)
 140
 141                     # skip null children
 142                     if not childName: continue
 143
 144                     # initialize the possible array of children
 145                     if not node[elementName].has_key(childName): node[elementName][childName] = []
 146
 147                     if isinstance(child, minidom.Text):
 148                         # add if data is not empty
 149                         if child.data.strip():
 150                             node[elementName][childName].append(nextchild.data)
 151                     elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
 152                         for nextchild in child.childNodes:
 153                             node[elementName][childName].append(nextchild.data)
 154                     else:
 155                         childdict = self.toDict(child)
 156                         for value in childdict.values():
 157                             node[elementName][childName].append(value)
 158
 159         return node
 160
 161
 162     def toxml(self):
 163         """
 164         convert this rspec to an xml string and return it.
 165         """
 166         return self.rootNode.toxml()
 167
 168
 169     def toprettyxml(self):
 170         """
 171         print this rspec in xml in a pretty format.
 172         """
 173         return self.rootNode.toprettyxml()
 174
 175
 176     def parseFile(self, filename):
 177         """
 178         read a local xml file and store it as a dom object.
 179         """
 180         dom = minidom.parse(filename)
 181         self.rootNode = dom.childNodes[0]
 182
 183
 184     def parseString(self, xml):
 185         """
 186         read an xml string and store it as a dom object.
 187         """
 188         xml = xml.replace('\n', '').replace('\t', '').strip()
 189         dom = minidom.parseString(xml)
 190         self.rootNode = dom.childNodes[0]
 191
 192
 193     def _httpGetXSD(self, xsdURI):
 194         # split the URI into relevant parts
 195         host = xsdURI.split("/")[2]
 196         if xsdURI.startswith("https"):
 197             conn = httplib.HTTPSConnection(host,
 198                 httplib.HTTPSConnection.default_port)
 199         elif xsdURI.startswith("http"):
 200             conn = httplib.HTTPConnection(host,
 201                 httplib.HTTPConnection.default_port)
 202         conn.request("GET", xsdURI)
 203         # If we can't download the schema, raise an exception
 204         r1 = conn.getresponse()
 205         if r1.status != 200:
 206             raise Exception
 207         return r1.read().replace('\n', '').replace('\t', '').strip()
 208
 209
 210     def _parseXSD(self, xsdURI):
 211         """
 212         Download XSD from URL, or if file, read local xsd file and set schemaDict
 213         """
 214         # Since the schema definiton is a global namespace shared by and agreed upon by
 215         # others, this should probably be a URL.  Check for URL, download xsd, parse, or
 216         # if local file, use local file.
 217         schemaDom = None
 218         if xsdURI.startswith("http"):
 219             try:
 220                 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
 221             except Exception, e:
 222                 # logging.debug("%s: web file not found" % xsdURI)
 223                 # logging.debug("Using local file %s" % self.xsd")
 224                 print e
 225                 print "Can't find %s on the web. Continuing." % xsdURI
 226         if not schemaDom:
 227             if os.path.exists(xsdURI):
 228                 # logging.debug("using local copy.")
 229                 print "Using local %s" % xsdURI
 230                 schemaDom = minidom.parse(xsdURI)
 231             else:
 232                 raise Exception("Can't find xsd locally")
 233         self.schemaDict = self.toDict(schemaDom.childNodes[0])
 234
 235
 236     def dict2dom(self, rdict, include_doc = False):
 237         """
 238         convert a dict object into a dom object.
 239         """
 240
 241         def elementNode(tagname, rd):
 242             element = minidom.Element(tagname)
 243             for key in rd.keys():
 244                 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
 245                     element.setAttribute(key, str(rd[key]))
 246                 elif isinstance(rd[key], dict):
 247                     child = elementNode(key, rd[key])
 248                     element.appendChild(child)
 249                 elif isinstance(rd[key], list):
 250                     for item in rd[key]:
 251                         if isinstance(item, dict):
 252                             child = elementNode(key, item)
 253                             element.appendChild(child)
 254                         elif isinstance(item, StringTypes) or isinstance(item, int):
 255                             child = minidom.Element(key)
 256                             text = minidom.Text()
 257                             text.data = item
 258                             child.appendChild(text)
 259                             element.appendChild(child)
 260             return element
 261
 262         # Minidom does not allow documents to have more then one
 263         # child, but elements may have many children. Because of
 264         # this, the document's root node will be the first key/value
 265         # pair in the dictionary.
 266         node = elementNode(rdict.keys()[0], rdict.values()[0])
 267         if include_doc:
 268             rootNode = minidom.Document()
 269             rootNode.appendChild(node)
 270         else:
 271             rootNode = node
 272         return rootNode
 273
 274
 275     def parseDict(self, rdict, include_doc = True):
 276         """
 277         Convert a dictionary into a dom object and store it.
 278         """
 279         self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
 280
 281
 282     def getDictsByTagName(self, tagname, dom = None):
 283         """
 284         Search the dom for all elements with the specified tagname
 285         and return them as a list of dicts
 286         """
 287         if not dom:
 288             dom = self.rootNode
 289         dicts = []
 290         doms = dom.getElementsByTagName(tagname)
 291         dictlist = [self.toDict(d) for d in doms]
 292         for item in dictlist:
 293             for value in item.values():
 294                 dicts.append(value)
 295         return dicts
 296
 297     def getDictByTagNameValue(self, tagname, value, dom = None):
 298         """
 299         Search the dom for the first element with the specified tagname
 300         and value and return it as a dict.
 301         """
 302         tempdict = {}
 303         if not dom:
 304             dom = self.rootNode
 305         dicts = self.getDictsByTagName(tagname, dom)
 306
 307         for rdict in dicts:
 308             if rdict.has_key('name') and rdict['name'] in [value]:
 309                 return rdict
 310
 311         return tempdict
 312
 313
 314     def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
 315         """
 316         Removes all elements where:
 317         1. tagname matches the element tag
 318         2. attribute matches the element attribte
 319         3. attribute value is in valuelist
 320         """
 321
 322         tempdict = {}
 323         if not dom:
 324             dom = self.rootNode
 325
 326         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 327             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 328                 dom.parentNode.removeChild(dom)
 329             if blacklist and dom.attributes.get(attribute).value in blacklist:
 330                 dom.parentNode.removeChild(dom)
 331
 332         if dom.hasChildNodes():
 333             for child in dom.childNodes:
 334                 self.filter(tagname, attribute, blacklist, whitelist, child)
 335
 336
 337     def merge(self, rspecs, tagname, dom=None):
 338         """
 339         Merge this rspec with the requested rspec based on the specified
 340         starting tag name. The start tag (and all of its children) will be merged
 341         """
 342         tempdict = {}
 343         if not dom:
 344             dom = self.rootNode
 345
 346         whitelist = []
 347         blacklist = []
 348
 349         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 350             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 351                 dom.parentNode.removeChild(dom)
 352             if blacklist and dom.attributes.get(attribute).value in blacklist:
 353                 dom.parentNode.removeChild(dom)
 354
 355         if dom.hasChildNodes():
 356             for child in dom.childNodes:
 357                 self.filter(tagname, attribute, blacklist, whitelist, child)
 358
 359     def validateDicts(self):
 360         types = {
 361             'EInt' : int,
 362             'EString' : str,
 363             'EByteArray' : list,
 364             'EBoolean' : bool,
 365             'EFloat' : float,
 366             'EDate' : date}
 367
 368
 369     def pprint(self, r = None, depth = 0):
 370         """
 371         Pretty print the dict
 372         """
 373         line = ""
 374         if r == None: r = self.dict
 375         # Set the dept
 376         for tab in range(0,depth): line += "    "
 377         # check if it's nested
 378         if type(r) == dict:
 379             for i in r.keys():
 380                 print line + "%s:" % i
 381                 self.pprint(r[i], depth + 1)
 382         elif type(r) in (tuple, list):
 383             for j in r: self.pprint(j, depth + 1)
 384         # not nested so just print.
 385         else:
 386             print line + "%s" %  r
 387
 388
 389
 390 class RecordSpec(RSpec):
 391
 392     root_tag = 'record'
 393     def parseDict(self, rdict, include_doc = False):
 394         """
 395         Convert a dictionary into a dom object and store it.
 396         """
 397         self.rootNode = self.dict2dom(rdict, include_doc)
 398
 399     def dict2dom(self, rdict, include_doc = False):
 400         record_dict = rdict
 401         if not len(rdict.keys()) == 1:
 402             record_dict = {self.root_tag : rdict}
 403         return RSpec.dict2dom(self, record_dict, include_doc)
 404
 405
 406 # vim:ts=4:expandtab
 407