sfa/util/rspec.py

   1 ### $Id$
   2 ### $URL$
   3
   4 import sys
   5 import pprint
   6 import os
   7 import httplib
   8 from xml.dom import minidom
   9 from types import StringTypes, ListType
  10
  11 class Rspec:
  12
  13     def __init__(self, xml = None, xsd = None, NSURL = None):
  14         '''
  15         Class to manipulate RSpecs.  Reads and parses rspec xml into python dicts
  16         and reads python dicts and writes rspec xml
  17
  18         self.xsd = # Schema.  Can be local or remote file.
  19         self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
  20         self.rootNode = # root of the DOM
  21         self.dict = # dict of the RSpec.
  22         self.schemaDict = {} # dict of the Schema
  23         '''
  24
  25         self.xsd = xsd
  26         self.rootNode = None
  27         self.dict = {}
  28         self.schemaDict = {}
  29         self.NSURL = NSURL
  30         if xml:
  31             if type(xml) == file:
  32                 self.parseFile(xml)
  33             if type(xml) == str:
  34                 self.parseString(xml)
  35             self.dict = self.toDict()
  36         if xsd:
  37             self._parseXSD(self.NSURL + self.xsd)
  38
  39
  40     def _getText(self, nodelist):
  41         rc = ""
  42         for node in nodelist:
  43             if node.nodeType == node.TEXT_NODE:
  44                 rc = rc + node.data
  45         return rc
  46
  47     # The rspec is comprised of 2 parts, and 1 reference:
  48     # attributes/elements describe individual resources
  49     # complexTypes are used to describe a set of attributes/elements
  50     # complexTypes can include a reference to other complexTypes.
  51
  52
  53     def _getName(self, node):
  54         '''Gets name of node. If tag has no name, then return tag's localName'''
  55         name = None
  56         if not node.nodeName.startswith("#"):
  57             if node.localName:
  58                 name = node.localName
  59             elif node.attributes.has_key("name"):
  60                 name = node.attributes.get("name").value
  61         return name
  62
  63
  64     # Attribute.  {name : nameofattribute, {items: values})
  65     def _attributeDict(self, attributeDom):
  66         '''Traverse single attribute node.  Create a dict {attributename : {name: value,}]}'''
  67         node = {} # parsed dict
  68         for attr in attributeDom.attributes.keys():
  69             node[attr] = attributeDom.attributes.get(attr).value
  70         return node
  71
  72
  73     def appendToDictOrCreate(self, dict, key, value):
  74         if (dict.has_key(key)):
  75             dict[key].append(value)
  76         else:
  77             dict[key]=[value]
  78         return dict
  79
  80     def toDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
  81         """
  82         convert an XML to a nested dict:
  83           * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
  84           * Terminal nodes (the rest) are nested dictionaries
  85         """
  86
  87         if (not nodeDom):
  88             nodeDom=self.rootNode
  89
  90         curNodeName = nodeDom.localName
  91
  92         if (nodeDom.hasChildNodes()):
  93             childdict={}
  94             for child in nodeDom.childNodes[:-1]:
  95                 if (child.nodeValue):
  96                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
  97                 else:
  98                     childdict = self.toDict(child, None, childdict, curNodeName)
  99
 100             child = nodeDom.childNodes[-1]
 101             if (child.nodeValue):
 102                 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 103             else:
 104                 siblingdict = self.toDict(child, siblingdict, childdict, curNodeName)
 105
 106             # Keep the attributes separate from text nodes
 107             attrdict={}
 108             for attribute in nodeDom.attributes.keys():
 109                 attrdict = self.appendToDictOrCreate(attrdict, attribute, nodeDom.getAttribute(attribute))
 110             if (attrdict):
 111                 self.appendToDictOrCreate(siblingdict, curNodeName, attrdict)
 112         else:
 113             self.appendToDictOrCreate(siblingdict, curNodeName, [])
 114
 115         if (parentdict is not None):
 116             parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
 117             return parentdict
 118         else:
 119             return siblingdict
 120
 121
 122
 123 #    def toDict(self, nodeDom = None):
 124 #        """
 125 #        convert this rspec to a dict and return it.
 126 #        """
 127 #        node = {}
 128 #        if not nodeDom:
 129 #             nodeDom = self.rootNode
 130 #
 131 #        elementName = nodeDom.nodeName
 132 #        if elementName and not elementName.startswith("#"):
 133 #            # attributes have tags and values.  get {tag: value}, else {type: value}
 134 #            node[elementName] = self._attributeDict(nodeDom)
 135 #            # resolve the child nodes.
 136 #            if nodeDom.hasChildNodes():
 137 #                for child in nodeDom.childNodes:
 138 #                    childName = self._getName(child)
 139 #                    # skip null children
 140 #                    if not childName:
 141 #                        continue
 142 #                    # initialize the possible array of children
 143 #                    if not node[elementName].has_key(childName):
 144 #                        node[elementName][childName] = []
 145 #                    # if child node has text child nodes
 146 #                    # append the children to the array as strings
 147 #                    if child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
 148 #                        for nextchild in child.childNodes:
 149 #                            node[elementName][childName].append(nextchild.data)
 150 #                    # convert element child node to dict
 151 #                    else:
 152 #                        childdict = self.toDict(child)
 153 #                        for value in childdict.values():
 154 #                            node[elementName][childName].append(value)
 155 #                    #node[childName].append(self.toDict(child))
 156 #        return node
 157
 158
 159     def toxml(self):
 160         """
 161         convert this rspec to an xml string and return it.
 162         """
 163         return self.rootNode.toxml()
 164
 165
 166     def toprettyxml(self):
 167         """
 168         print this rspec in xml in a pretty format.
 169         """
 170         return self.rootNode.toprettyxml()
 171
 172
 173     def parseFile(self, filename):
 174         """
 175         read a local xml file and store it as a dom object.
 176         """
 177         dom = minidom.parse(filename)
 178         self.rootNode = dom.childNodes[0]
 179
 180
 181     def parseString(self, xml):
 182         """
 183         read an xml string and store it as a dom object.
 184         """
 185         xml = xml.replace('\n', '').replace('\t', '').strip()
 186         dom = minidom.parseString(xml)
 187         self.rootNode = dom.childNodes[0]
 188
 189
 190     def _httpGetXSD(self, xsdURI):
 191         # split the URI into relevant parts
 192         host = xsdURI.split("/")[2]
 193         if xsdURI.startswith("https"):
 194             conn = httplib.HTTPSConnection(host,
 195                 httplib.HTTPSConnection.default_port)
 196         elif xsdURI.startswith("http"):
 197             conn = httplib.HTTPConnection(host,
 198                 httplib.HTTPConnection.default_port)
 199         conn.request("GET", xsdURI)
 200         # If we can't download the schema, raise an exception
 201         r1 = conn.getresponse()
 202         if r1.status != 200:
 203             raise Exception
 204         return r1.read().replace('\n', '').replace('\t', '').strip()
 205
 206
 207     def _parseXSD(self, xsdURI):
 208         """
 209         Download XSD from URL, or if file, read local xsd file and set schemaDict
 210         """
 211         # Since the schema definiton is a global namespace shared by and agreed upon by
 212         # others, this should probably be a URL.  Check for URL, download xsd, parse, or
 213         # if local file, use local file.
 214         schemaDom = None
 215         if xsdURI.startswith("http"):
 216             try:
 217                 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
 218             except Exception, e:
 219                 # logging.debug("%s: web file not found" % xsdURI)
 220                 # logging.debug("Using local file %s" % self.xsd")
 221                 print e
 222                 print "Can't find %s on the web. Continuing." % xsdURI
 223         if not schemaDom:
 224             if os.path.exists(xsdURI):
 225                 # logging.debug("using local copy.")
 226                 print "Using local %s" % xsdURI
 227                 schemaDom = minidom.parse(xsdURI)
 228             else:
 229                 raise Exception("Can't find xsd locally")
 230         self.schemaDict = self.toDict(schemaDom.childNodes[0])
 231
 232
 233     def dict2dom(self, rdict, include_doc = False):
 234         """
 235         convert a dict object into a dom object.
 236         """
 237
 238         def elementNode(tagname, rd):
 239             element = minidom.Element(tagname)
 240             for key in rd.keys():
 241                 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
 242                     element.setAttribute(key, str(rd[key]))
 243                 elif isinstance(rd[key], dict):
 244                     child = elementNode(key, rd[key])
 245                     element.appendChild(child)
 246                 elif isinstance(rd[key], list):
 247                     for item in rd[key]:
 248                         if isinstance(item, dict):
 249                             child = elementNode(key, item)
 250                             element.appendChild(child)
 251                         elif isinstance(item, StringTypes) or isinstance(item, int):
 252                             child = minidom.Element(key)
 253                             text = minidom.Text()
 254                             text.data = item
 255                             child.appendChild(text)
 256                             element.appendChild(child)
 257             return element
 258
 259         # Minidom does not allow documents to have more then one
 260         # child, but elements may have many children. Because of
 261         # this, the document's root node will be the first key/value
 262         # pair in the dictionary.
 263         node = elementNode(rdict.keys()[0], rdict.values()[0])
 264         if include_doc:
 265             rootNode = minidom.Document()
 266             rootNode.appendChild(node)
 267         else:
 268             rootNode = node
 269         return rootNode
 270
 271
 272     def parseDict(self, rdict, include_doc = True):
 273         """
 274         Convert a dictionary into a dom object and store it.
 275         """
 276         self.rootNode = self.dict2dom(rdict, include_doc)
 277
 278
 279     def getDictsByTagName(self, tagname, dom = None):
 280         """
 281         Search the dom for all elements with the specified tagname
 282         and return them as a list of dicts
 283         """
 284         if not dom:
 285             dom = self.rootNode
 286         dicts = []
 287         doms = dom.getElementsByTagName(tagname)
 288         dictlist = [self.toDict(d) for d in doms]
 289         for item in dictlist:
 290             for value in item.values():
 291                 dicts.append(value)
 292         return dicts
 293
 294     def getDictByTagNameValue(self, tagname, value, dom = None):
 295         """
 296         Search the dom for the first element with the specified tagname
 297         and value and return it as a dict.
 298         """
 299         tempdict = {}
 300         if not dom:
 301             dom = self.rootNode
 302         dicts = self.getDictsByTagName(tagname, dom)
 303
 304         for rdict in dicts:
 305             if rdict.has_key('name') and rdict['name'] in [value]:
 306                 return rdict
 307
 308         return tempdict
 309
 310
 311     def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
 312         """
 313         Removes all elements where:
 314         1. tagname matches the element tag
 315         2. attribute matches the element attribte
 316         3. attribute value is in valuelist
 317         """
 318
 319         tempdict = {}
 320         if not dom:
 321             dom = self.rootNode
 322
 323         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 324             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 325                 dom.parentNode.removeChild(dom)
 326             if blacklist and dom.attributes.get(attribute).value in blacklist:
 327                 dom.parentNode.removeChild(dom)
 328
 329         if dom.hasChildNodes():
 330             for child in dom.childNodes:
 331                 self.filter(tagname, attribute, blacklist, whitelist, child)
 332
 333
 334     def validateDicts(self):
 335         types = {
 336             'EInt' : int,
 337             'EString' : str,
 338             'EByteArray' : list,
 339             'EBoolean' : bool,
 340             'EFloat' : float,
 341             'EDate' : date}
 342
 343
 344     def pprint(self, r = None, depth = 0):
 345         """
 346         Pretty print the dict
 347         """
 348         line = ""
 349         if r == None: r = self.dict
 350         # Set the dept
 351         for tab in range(0,depth): line += "    "
 352         # check if it's nested
 353         if type(r) == dict:
 354             for i in r.keys():
 355                 print line + "%s:" % i
 356                 self.pprint(r[i], depth + 1)
 357         elif type(r) in (tuple, list):
 358             for j in r: self.pprint(j, depth + 1)
 359         # not nested so just print.
 360         else:
 361             print line + "%s" %  r
 362
 363
 364
 365 class RecordSpec(Rspec):
 366
 367     root_tag = 'record'
 368     def parseDict(self, rdict, include_doc = False):
 369         """
 370         Convert a dictionary into a dom object and store it.
 371         """
 372         self.rootNode = self.dict2dom(rdict, include_doc)
 373
 374     def dict2dom(self, rdict, include_doc = False):
 375         record_dict = rdict
 376         if not len(rdict.keys()) == 1:
 377             record_dict = {self.root_tag : rdict}
 378         return Rspec.dict2dom(self, record_dict, include_doc)
 379
 380
 381 # vim:ts=4:expandtab
 382