sfa/util/rspec.py

   1 ### $Id$
   2 ### $URL$
   3
   4 import sys
   5 import pprint
   6 import os
   7 import httplib
   8 from xml.dom import minidom
   9 from types import StringTypes, ListType
  10 from lxml import etree
  11 from StringIO import StringIO
  12
  13
  14 class RSpec:
  15
  16     def __init__(self, xml = None, xsd = None, NSURL = None):
  17         '''
  18         Class to manipulate RSpecs.  Reads and parses rspec xml into python dicts
  19         and reads python dicts and writes rspec xml
  20
  21         self.xsd = # Schema.  Can be local or remote file.
  22         self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
  23         self.rootNode = # root of the DOM
  24         self.dict = # dict of the RSpec.
  25         self.schemaDict = {} # dict of the Schema
  26         '''
  27
  28         self.xsd = xsd
  29         self.rootNode = None
  30         self.dict = {}
  31         self.schemaDict = {}
  32         self.NSURL = NSURL
  33         if xml:
  34             if type(xml) == file:
  35                 self.parseFile(xml)
  36             if type(xml) in StringTypes:
  37                 self.parseString(xml)
  38             self.dict = self.toDict()
  39         if xsd:
  40             self._parseXSD(self.NSURL + self.xsd)
  41
  42
  43     def _getText(self, nodelist):
  44         rc = ""
  45         for node in nodelist:
  46             if node.nodeType == node.TEXT_NODE:
  47                 rc = rc + node.data
  48         return rc
  49
  50     # The rspec is comprised of 2 parts, and 1 reference:
  51     # attributes/elements describe individual resources
  52     # complexTypes are used to describe a set of attributes/elements
  53     # complexTypes can include a reference to other complexTypes.
  54
  55
  56     def _getName(self, node):
  57         '''Gets name of node. If tag has no name, then return tag's localName'''
  58         name = None
  59         if not node.nodeName.startswith("#"):
  60             if node.localName:
  61                 name = node.localName
  62             elif node.attributes.has_key("name"):
  63                 name = node.attributes.get("name").value
  64         return name
  65
  66
  67     # Attribute.  {name : nameofattribute, {items: values})
  68     def _attributeDict(self, attributeDom):
  69         '''Traverse single attribute node.  Create a dict {attributename : {name: value,}]}'''
  70         node = {} # parsed dict
  71         for attr in attributeDom.attributes.keys():
  72             node[attr] = attributeDom.attributes.get(attr).value
  73         return node
  74
  75
  76     def appendToDictOrCreate(self, dict, key, value):
  77         if (dict.has_key(key)):
  78             dict[key].append(value)
  79         else:
  80             dict[key]=[value]
  81         return dict
  82
  83     def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
  84         """
  85         convert an XML to a nested dict:
  86           * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
  87           * Terminal nodes (the rest) are nested dictionaries
  88         """
  89
  90         if (not nodeDom):
  91             nodeDom=self.rootNode
  92
  93         curNodeName = nodeDom.localName
  94
  95         if (nodeDom.hasChildNodes()):
  96             childdict={}
  97             for attribute in nodeDom.attributes.keys():
  98                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
  99             for child in nodeDom.childNodes[:-1]:
 100                 if (child.nodeValue):
 101                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 102                 else:
 103                     childdict = self.toGenDict(child, None, childdict, curNodeName)
 104
 105             child = nodeDom.childNodes[-1]
 106             if (child.nodeValue):
 107                 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 108                 if (childdict):
 109                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 110             else:
 111                 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
 112         else:
 113             childdict={}
 114             for attribute in nodeDom.attributes.keys():
 115                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
 116
 117             self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 118
 119         if (parentdict is not None):
 120             parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
 121             return parentdict
 122         else:
 123             return siblingdict
 124
 125
 126
 127     def toDict(self, nodeDom = None):
 128         """
 129         convert this rspec to a dict and return it.
 130         """
 131         node = {}
 132         if not nodeDom:
 133              nodeDom = self.rootNode
 134
 135         elementName = nodeDom.nodeName
 136         if elementName and not elementName.startswith("#"):
 137             # attributes have tags and values.  get {tag: value}, else {type: value}
 138             node[elementName] = self._attributeDict(nodeDom)
 139             # resolve the child nodes.
 140             if nodeDom.hasChildNodes():
 141                 for child in nodeDom.childNodes:
 142                     childName = self._getName(child)
 143
 144                     # skip null children
 145                     if not childName: continue
 146
 147                     # initialize the possible array of children
 148                     if not node[elementName].has_key(childName): node[elementName][childName] = []
 149
 150                     if isinstance(child, minidom.Text):
 151                         # add if data is not empty
 152                         if child.data.strip():
 153                             node[elementName][childName].append(nextchild.data)
 154                     elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
 155                         for nextchild in child.childNodes:
 156                             node[elementName][childName].append(nextchild.data)
 157                     else:
 158                         childdict = self.toDict(child)
 159                         for value in childdict.values():
 160                             node[elementName][childName].append(value)
 161
 162         return node
 163
 164
 165     def toxml(self):
 166         """
 167         convert this rspec to an xml string and return it.
 168         """
 169         return self.rootNode.toxml()
 170
 171
 172     def toprettyxml(self):
 173         """
 174         print this rspec in xml in a pretty format.
 175         """
 176         return self.rootNode.toprettyxml()
 177
 178
 179     def __removeWhitespaceNodes(self, parent):
 180         for child in list(parent.childNodes):
 181             if child.nodeType == minidom.Node.TEXT_NODE and child.data.strip() == '':
 182                 parent.removeChild(child)
 183             else:
 184                 self.__removeWhitespaceNodes(child)
 185
 186     def parseFile(self, filename):
 187         """
 188         read a local xml file and store it as a dom object.
 189         """
 190         dom = minidom.parse(filename)
 191         self.__removeWhitespaceNodes(dom)
 192         self.rootNode = dom.childNodes[0]
 193
 194
 195     def parseString(self, xml):
 196         """
 197         read an xml string and store it as a dom object.
 198         """
 199         dom = minidom.parseString(xml)
 200         self.__removeWhitespaceNodes(dom)
 201         self.rootNode = dom.childNodes[0]
 202
 203
 204     def _httpGetXSD(self, xsdURI):
 205         # split the URI into relevant parts
 206         host = xsdURI.split("/")[2]
 207         if xsdURI.startswith("https"):
 208             conn = httplib.HTTPSConnection(host,
 209                 httplib.HTTPSConnection.default_port)
 210         elif xsdURI.startswith("http"):
 211             conn = httplib.HTTPConnection(host,
 212                 httplib.HTTPConnection.default_port)
 213         conn.request("GET", xsdURI)
 214         # If we can't download the schema, raise an exception
 215         r1 = conn.getresponse()
 216         if r1.status != 200:
 217             raise Exception
 218         return r1.read().replace('\n', '').replace('\t', '').strip()
 219
 220
 221     def _parseXSD(self, xsdURI):
 222         """
 223         Download XSD from URL, or if file, read local xsd file and set
 224         schemaDict.
 225
 226         Since the schema definiton is a global namespace shared by and
 227         agreed upon by others, this should probably be a URL.  Check
 228         for URL, download xsd, parse, or if local file, use that.
 229         """
 230         schemaDom = None
 231         if xsdURI.startswith("http"):
 232             try:
 233                 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
 234             except Exception, e:
 235                 # logging.debug("%s: web file not found" % xsdURI)
 236                 # logging.debug("Using local file %s" % self.xsd")
 237                 print e
 238                 print "Can't find %s on the web. Continuing." % xsdURI
 239         if not schemaDom:
 240             if os.path.exists(xsdURI):
 241                 # logging.debug("using local copy.")
 242                 print "Using local %s" % xsdURI
 243                 schemaDom = minidom.parse(xsdURI)
 244             else:
 245                 raise Exception("Can't find xsd locally")
 246         self.schemaDict = self.toDict(schemaDom.childNodes[0])
 247
 248
 249     def dict2dom(self, rdict, include_doc = False):
 250         """
 251         convert a dict object into a dom object.
 252         """
 253
 254         def elementNode(tagname, rd):
 255             element = minidom.Element(tagname)
 256             for key in rd.keys():
 257                 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
 258                     element.setAttribute(key, str(rd[key]))
 259                 elif isinstance(rd[key], dict):
 260                     child = elementNode(key, rd[key])
 261                     element.appendChild(child)
 262                 elif isinstance(rd[key], list):
 263                     for item in rd[key]:
 264                         if isinstance(item, dict):
 265                             child = elementNode(key, item)
 266                             element.appendChild(child)
 267                         elif isinstance(item, StringTypes) or isinstance(item, int):
 268                             child = minidom.Element(key)
 269                             text = minidom.Text()
 270                             text.data = item
 271                             child.appendChild(text)
 272                             element.appendChild(child)
 273             return element
 274
 275         # Minidom does not allow documents to have more then one
 276         # child, but elements may have many children. Because of
 277         # this, the document's root node will be the first key/value
 278         # pair in the dictionary.
 279         node = elementNode(rdict.keys()[0], rdict.values()[0])
 280         if include_doc:
 281             rootNode = minidom.Document()
 282             rootNode.appendChild(node)
 283         else:
 284             rootNode = node
 285         return rootNode
 286
 287
 288     def parseDict(self, rdict, include_doc = True):
 289         """
 290         Convert a dictionary into a dom object and store it.
 291         """
 292         self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
 293
 294
 295     def getDictsByTagName(self, tagname, dom = None):
 296         """
 297         Search the dom for all elements with the specified tagname
 298         and return them as a list of dicts
 299         """
 300         if not dom:
 301             dom = self.rootNode
 302         dicts = []
 303         doms = dom.getElementsByTagName(tagname)
 304         dictlist = [self.toDict(d) for d in doms]
 305         for item in dictlist:
 306             for value in item.values():
 307                 dicts.append(value)
 308         return dicts
 309
 310     def getDictByTagNameValue(self, tagname, value, dom = None):
 311         """
 312         Search the dom for the first element with the specified tagname
 313         and value and return it as a dict.
 314         """
 315         tempdict = {}
 316         if not dom:
 317             dom = self.rootNode
 318         dicts = self.getDictsByTagName(tagname, dom)
 319
 320         for rdict in dicts:
 321             if rdict.has_key('name') and rdict['name'] in [value]:
 322                 return rdict
 323
 324         return tempdict
 325
 326
 327     def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
 328         """
 329         Removes all elements where:
 330         1. tagname matches the element tag
 331         2. attribute matches the element attribte
 332         3. attribute value is in valuelist
 333         """
 334
 335         tempdict = {}
 336         if not dom:
 337             dom = self.rootNode
 338
 339         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 340             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 341                 dom.parentNode.removeChild(dom)
 342             if blacklist and dom.attributes.get(attribute).value in blacklist:
 343                 dom.parentNode.removeChild(dom)
 344
 345         if dom.hasChildNodes():
 346             for child in dom.childNodes:
 347                 self.filter(tagname, attribute, blacklist, whitelist, child)
 348
 349
 350     def merge(self, rspecs, tagname, dom=None):
 351         """
 352         Merge this rspec with the requested rspec based on the specified
 353         starting tag name. The start tag (and all of its children) will be merged
 354         """
 355         tempdict = {}
 356         if not dom:
 357             dom = self.rootNode
 358
 359         whitelist = []
 360         blacklist = []
 361
 362         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 363             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 364                 dom.parentNode.removeChild(dom)
 365             if blacklist and dom.attributes.get(attribute).value in blacklist:
 366                 dom.parentNode.removeChild(dom)
 367
 368         if dom.hasChildNodes():
 369             for child in dom.childNodes:
 370                 self.filter(tagname, attribute, blacklist, whitelist, child)
 371
 372     def validateDicts(self):
 373         types = {
 374             'EInt' : int,
 375             'EString' : str,
 376             'EByteArray' : list,
 377             'EBoolean' : bool,
 378             'EFloat' : float,
 379             'EDate' : date}
 380
 381
 382     def pprint(self, r = None, depth = 0):
 383         """
 384         Pretty print the dict
 385         """
 386         line = ""
 387         if r == None: r = self.dict
 388         # Set the dept
 389         for tab in range(0,depth): line += "    "
 390         # check if it's nested
 391         if type(r) == dict:
 392             for i in r.keys():
 393                 print line + "%s:" % i
 394                 self.pprint(r[i], depth + 1)
 395         elif type(r) in (tuple, list):
 396             for j in r: self.pprint(j, depth + 1)
 397         # not nested so just print.
 398         else:
 399             print line + "%s" %  r
 400
 401
 402
 403 class RecordSpec(RSpec):
 404
 405     root_tag = 'record'
 406     def parseDict(self, rdict, include_doc = False):
 407         """
 408         Convert a dictionary into a dom object and store it.
 409         """
 410         self.rootNode = self.dict2dom(rdict, include_doc)
 411
 412     def dict2dom(self, rdict, include_doc = False):
 413         record_dict = rdict
 414         if not len(rdict.keys()) == 1:
 415             record_dict = {self.root_tag : rdict}
 416         return RSpec.dict2dom(self, record_dict, include_doc)
 417
 418
 419 # vim:ts=4:expandtab
 420