sfa/util/rspec.py

   1 ### $Id$
   2 ### $URL$
   3
   4 import sys
   5 import pprint
   6 import os
   7 import httplib
   8 from xml.dom import minidom
   9 from types import StringTypes, ListType
  10
  11 class RSpec:
  12
  13     def __init__(self, xml = None, xsd = None, NSURL = None):
  14         '''
  15         Class to manipulate RSpecs.  Reads and parses rspec xml into python dicts
  16         and reads python dicts and writes rspec xml
  17
  18         self.xsd = # Schema.  Can be local or remote file.
  19         self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
  20         self.rootNode = # root of the DOM
  21         self.dict = # dict of the RSpec.
  22         self.schemaDict = {} # dict of the Schema
  23         '''
  24
  25         self.xsd = xsd
  26         self.rootNode = None
  27         self.dict = {}
  28         self.schemaDict = {}
  29         self.NSURL = NSURL
  30         if xml:
  31             if type(xml) == file:
  32                 self.parseFile(xml)
  33             if type(xml) == str:
  34                 self.parseString(xml)
  35             self.dict = self.toDict()
  36         if xsd:
  37             self._parseXSD(self.NSURL + self.xsd)
  38
  39
  40     def _getText(self, nodelist):
  41         rc = ""
  42         for node in nodelist:
  43             if node.nodeType == node.TEXT_NODE:
  44                 rc = rc + node.data
  45         return rc
  46
  47     # The rspec is comprised of 2 parts, and 1 reference:
  48     # attributes/elements describe individual resources
  49     # complexTypes are used to describe a set of attributes/elements
  50     # complexTypes can include a reference to other complexTypes.
  51
  52
  53     def _getName(self, node):
  54         '''Gets name of node. If tag has no name, then return tag's localName'''
  55         name = None
  56         if not node.nodeName.startswith("#"):
  57             if node.localName:
  58                 name = node.localName
  59             elif node.attributes.has_key("name"):
  60                 name = node.attributes.get("name").value
  61         return name
  62
  63
  64     # Attribute.  {name : nameofattribute, {items: values})
  65     def _attributeDict(self, attributeDom):
  66         '''Traverse single attribute node.  Create a dict {attributename : {name: value,}]}'''
  67         node = {} # parsed dict
  68         for attr in attributeDom.attributes.keys():
  69             node[attr] = attributeDom.attributes.get(attr).value
  70         return node
  71
  72
  73     def appendToDictOrCreate(self, dict, key, value):
  74         if (dict.has_key(key)):
  75             dict[key].append(value)
  76         else:
  77             dict[key]=[value]
  78         return dict
  79
  80     def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
  81         """
  82         convert an XML to a nested dict:
  83           * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
  84           * Terminal nodes (the rest) are nested dictionaries
  85         """
  86
  87         if (not nodeDom):
  88             nodeDom=self.rootNode
  89
  90         curNodeName = nodeDom.localName
  91
  92         if (nodeDom.hasChildNodes()):
  93             childdict={}
  94             for attribute in nodeDom.attributes.keys():
  95                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
  96             for child in nodeDom.childNodes[:-1]:
  97                 if (child.nodeValue):
  98                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
  99                 else:
 100                     childdict = self.toGenDict(child, None, childdict, curNodeName)
 101
 102             child = nodeDom.childNodes[-1]
 103             if (child.nodeValue):
 104                 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 105                 if (childdict):
 106                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 107             else:
 108                 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
 109         else:
 110             childdict={}
 111             for attribute in nodeDom.attributes.keys():
 112                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
 113
 114             self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 115
 116         if (parentdict is not None):
 117             parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
 118             return parentdict
 119         else:
 120             return siblingdict
 121
 122
 123
 124     def toDict(self, nodeDom = None):
 125         """
 126         convert this rspec to a dict and return it.
 127         """
 128         node = {}
 129         if not nodeDom:
 130              nodeDom = self.rootNode
 131
 132         elementName = nodeDom.nodeName
 133         if elementName and not elementName.startswith("#"):
 134             # attributes have tags and values.  get {tag: value}, else {type: value}
 135             node[elementName] = self._attributeDict(nodeDom)
 136             # resolve the child nodes.
 137             if nodeDom.hasChildNodes():
 138                 for child in nodeDom.childNodes:
 139                     childName = self._getName(child)
 140                     # skip null children
 141                     if not childName:
 142                         continue
 143                     # initialize the possible array of children
 144                     if not node[elementName].has_key(childName):
 145                         node[elementName][childName] = []
 146                     # if child node has text child nodes
 147                     # append the children to the array as strings
 148                     if child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
 149                         for nextchild in child.childNodes:
 150                             node[elementName][childName].append(nextchild.data)
 151                     # convert element child node to dict
 152                     else:
 153                         childdict = self.toDict(child)
 154                         for value in childdict.values():
 155                             node[elementName][childName].append(value)
 156                     #node[childName].append(self.toDict(child))
 157         return node
 158
 159
 160     def toxml(self):
 161         """
 162         convert this rspec to an xml string and return it.
 163         """
 164         return self.rootNode.toxml()
 165
 166
 167     def toprettyxml(self):
 168         """
 169         print this rspec in xml in a pretty format.
 170         """
 171         return self.rootNode.toprettyxml()
 172
 173
 174     def parseFile(self, filename):
 175         """
 176         read a local xml file and store it as a dom object.
 177         """
 178         dom = minidom.parse(filename)
 179         self.rootNode = dom.childNodes[0]
 180
 181
 182     def parseString(self, xml):
 183         """
 184         read an xml string and store it as a dom object.
 185         """
 186         xml = xml.replace('\n', '').replace('\t', '').strip()
 187         dom = minidom.parseString(xml)
 188         self.rootNode = dom.childNodes[0]
 189
 190
 191     def _httpGetXSD(self, xsdURI):
 192         # split the URI into relevant parts
 193         host = xsdURI.split("/")[2]
 194         if xsdURI.startswith("https"):
 195             conn = httplib.HTTPSConnection(host,
 196                 httplib.HTTPSConnection.default_port)
 197         elif xsdURI.startswith("http"):
 198             conn = httplib.HTTPConnection(host,
 199                 httplib.HTTPConnection.default_port)
 200         conn.request("GET", xsdURI)
 201         # If we can't download the schema, raise an exception
 202         r1 = conn.getresponse()
 203         if r1.status != 200:
 204             raise Exception
 205         return r1.read().replace('\n', '').replace('\t', '').strip()
 206
 207
 208     def _parseXSD(self, xsdURI):
 209         """
 210         Download XSD from URL, or if file, read local xsd file and set schemaDict
 211         """
 212         # Since the schema definiton is a global namespace shared by and agreed upon by
 213         # others, this should probably be a URL.  Check for URL, download xsd, parse, or
 214         # if local file, use local file.
 215         schemaDom = None
 216         if xsdURI.startswith("http"):
 217             try:
 218                 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
 219             except Exception, e:
 220                 # logging.debug("%s: web file not found" % xsdURI)
 221                 # logging.debug("Using local file %s" % self.xsd")
 222                 print e
 223                 print "Can't find %s on the web. Continuing." % xsdURI
 224         if not schemaDom:
 225             if os.path.exists(xsdURI):
 226                 # logging.debug("using local copy.")
 227                 print "Using local %s" % xsdURI
 228                 schemaDom = minidom.parse(xsdURI)
 229             else:
 230                 raise Exception("Can't find xsd locally")
 231         self.schemaDict = self.toDict(schemaDom.childNodes[0])
 232
 233
 234     def dict2dom(self, rdict, include_doc = False):
 235         """
 236         convert a dict object into a dom object.
 237         """
 238
 239         def elementNode(tagname, rd):
 240             element = minidom.Element(tagname)
 241             for key in rd.keys():
 242                 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
 243                     element.setAttribute(key, str(rd[key]))
 244                 elif isinstance(rd[key], dict):
 245                     child = elementNode(key, rd[key])
 246                     element.appendChild(child)
 247                 elif isinstance(rd[key], list):
 248                     for item in rd[key]:
 249                         if isinstance(item, dict):
 250                             child = elementNode(key, item)
 251                             element.appendChild(child)
 252                         elif isinstance(item, StringTypes) or isinstance(item, int):
 253                             child = minidom.Element(key)
 254                             text = minidom.Text()
 255                             text.data = item
 256                             child.appendChild(text)
 257                             element.appendChild(child)
 258             return element
 259
 260         # Minidom does not allow documents to have more then one
 261         # child, but elements may have many children. Because of
 262         # this, the document's root node will be the first key/value
 263         # pair in the dictionary.
 264         node = elementNode(rdict.keys()[0], rdict.values()[0])
 265         if include_doc:
 266             rootNode = minidom.Document()
 267             rootNode.appendChild(node)
 268         else:
 269             rootNode = node
 270         return rootNode
 271
 272
 273     def parseDict(self, rdict, include_doc = True):
 274         """
 275         Convert a dictionary into a dom object and store it.
 276         """
 277         self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
 278
 279
 280     def getDictsByTagName(self, tagname, dom = None):
 281         """
 282         Search the dom for all elements with the specified tagname
 283         and return them as a list of dicts
 284         """
 285         if not dom:
 286             dom = self.rootNode
 287         dicts = []
 288         doms = dom.getElementsByTagName(tagname)
 289         dictlist = [self.toDict(d) for d in doms]
 290         for item in dictlist:
 291             for value in item.values():
 292                 dicts.append(value)
 293         return dicts
 294
 295     def getDictByTagNameValue(self, tagname, value, dom = None):
 296         """
 297         Search the dom for the first element with the specified tagname
 298         and value and return it as a dict.
 299         """
 300         tempdict = {}
 301         if not dom:
 302             dom = self.rootNode
 303         dicts = self.getDictsByTagName(tagname, dom)
 304
 305         for rdict in dicts:
 306             if rdict.has_key('name') and rdict['name'] in [value]:
 307                 return rdict
 308
 309         return tempdict
 310
 311
 312     def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
 313         """
 314         Removes all elements where:
 315         1. tagname matches the element tag
 316         2. attribute matches the element attribte
 317         3. attribute value is in valuelist
 318         """
 319
 320         tempdict = {}
 321         if not dom:
 322             dom = self.rootNode
 323
 324         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 325             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 326                 dom.parentNode.removeChild(dom)
 327             if blacklist and dom.attributes.get(attribute).value in blacklist:
 328                 dom.parentNode.removeChild(dom)
 329
 330         if dom.hasChildNodes():
 331             for child in dom.childNodes:
 332                 self.filter(tagname, attribute, blacklist, whitelist, child)
 333
 334
 335     def merge(self, rspecs, tagname, dom=None):
 336         """
 337         Merge this rspec with the requested rspec based on the specified
 338         starting tag name. The start tag (and all of its children) will be merged
 339         """
 340         tempdict = {}
 341         if not dom:
 342             dom = self.rootNode
 343
 344         whitelist = []
 345         blacklist = []
 346
 347         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 348             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 349                 dom.parentNode.removeChild(dom)
 350             if blacklist and dom.attributes.get(attribute).value in blacklist:
 351                 dom.parentNode.removeChild(dom)
 352
 353         if dom.hasChildNodes():
 354             for child in dom.childNodes:
 355                 self.filter(tagname, attribute, blacklist, whitelist, child)
 356
 357     def validateDicts(self):
 358         types = {
 359             'EInt' : int,
 360             'EString' : str,
 361             'EByteArray' : list,
 362             'EBoolean' : bool,
 363             'EFloat' : float,
 364             'EDate' : date}
 365
 366
 367     def pprint(self, r = None, depth = 0):
 368         """
 369         Pretty print the dict
 370         """
 371         line = ""
 372         if r == None: r = self.dict
 373         # Set the dept
 374         for tab in range(0,depth): line += "    "
 375         # check if it's nested
 376         if type(r) == dict:
 377             for i in r.keys():
 378                 print line + "%s:" % i
 379                 self.pprint(r[i], depth + 1)
 380         elif type(r) in (tuple, list):
 381             for j in r: self.pprint(j, depth + 1)
 382         # not nested so just print.
 383         else:
 384             print line + "%s" %  r
 385
 386
 387
 388 class RecordSpec(RSpec):
 389
 390     root_tag = 'record'
 391     def parseDict(self, rdict, include_doc = False):
 392         """
 393         Convert a dictionary into a dom object and store it.
 394         """
 395         self.rootNode = self.dict2dom(rdict, include_doc)
 396
 397     def dict2dom(self, rdict, include_doc = False):
 398         record_dict = rdict
 399         if not len(rdict.keys()) == 1:
 400             record_dict = {self.root_tag : rdict}
 401         return RSpec.dict2dom(self, record_dict, include_doc)
 402
 403
 404 # vim:ts=4:expandtab
 405