sfa/util/rspec.py

   1 import sys
   2 import pprint
   3 import os
   4 from StringIO import StringIO
   5 from types import StringTypes, ListType
   6 import httplib
   7 from xml.dom import minidom
   8 from lxml import etree
   9
  10 from sfa.util.sfalogging import info_logger
  11
  12 class RSpec:
  13
  14     def __init__(self, xml = None, xsd = None, NSURL = None):
  15         '''
  16         Class to manipulate RSpecs.  Reads and parses rspec xml into python dicts
  17         and reads python dicts and writes rspec xml
  18
  19         self.xsd = # Schema.  Can be local or remote file.
  20         self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
  21         self.rootNode = # root of the DOM
  22         self.dict = # dict of the RSpec.
  23         self.schemaDict = {} # dict of the Schema
  24         '''
  25
  26         self.xsd = xsd
  27         self.rootNode = None
  28         self.dict = {}
  29         self.schemaDict = {}
  30         self.NSURL = NSURL
  31         if xml:
  32             if type(xml) == file:
  33                 self.parseFile(xml)
  34             if type(xml) in StringTypes:
  35                 self.parseString(xml)
  36             self.dict = self.toDict()
  37         if xsd:
  38             self._parseXSD(self.NSURL + self.xsd)
  39
  40
  41     def _getText(self, nodelist):
  42         rc = ""
  43         for node in nodelist:
  44             if node.nodeType == node.TEXT_NODE:
  45                 rc = rc + node.data
  46         return rc
  47
  48     # The rspec is comprised of 2 parts, and 1 reference:
  49     # attributes/elements describe individual resources
  50     # complexTypes are used to describe a set of attributes/elements
  51     # complexTypes can include a reference to other complexTypes.
  52
  53
  54     def _getName(self, node):
  55         '''Gets name of node. If tag has no name, then return tag's localName'''
  56         name = None
  57         if not node.nodeName.startswith("#"):
  58             if node.localName:
  59                 name = node.localName
  60             elif node.attributes.has_key("name"):
  61                 name = node.attributes.get("name").value
  62         return name
  63
  64
  65     # Attribute.  {name : nameofattribute, {items: values})
  66     def _attributeDict(self, attributeDom):
  67         '''Traverse single attribute node.  Create a dict {attributename : {name: value,}]}'''
  68         node = {} # parsed dict
  69         for attr in attributeDom.attributes.keys():
  70             node[attr] = attributeDom.attributes.get(attr).value
  71         return node
  72
  73
  74     def appendToDictOrCreate(self, dict, key, value):
  75         if (dict.has_key(key)):
  76             dict[key].append(value)
  77         else:
  78             dict[key]=[value]
  79         return dict
  80
  81     def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
  82         """
  83         convert an XML to a nested dict:
  84           * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
  85           * Terminal nodes (the rest) are nested dictionaries
  86         """
  87
  88         if (not nodeDom):
  89             nodeDom=self.rootNode
  90
  91         curNodeName = nodeDom.localName
  92
  93         if (nodeDom.hasChildNodes()):
  94             childdict={}
  95             for attribute in nodeDom.attributes.keys():
  96                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
  97             for child in nodeDom.childNodes[:-1]:
  98                 if (child.nodeValue):
  99                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 100                 else:
 101                     childdict = self.toGenDict(child, None, childdict, curNodeName)
 102
 103             child = nodeDom.childNodes[-1]
 104             if (child.nodeValue):
 105                 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 106                 if (childdict):
 107                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 108             else:
 109                 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
 110         else:
 111             childdict={}
 112             for attribute in nodeDom.attributes.keys():
 113                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
 114
 115             self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 116
 117         if (parentdict is not None):
 118             parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
 119             return parentdict
 120         else:
 121             return siblingdict
 122
 123
 124
 125     def toDict(self, nodeDom = None):
 126         """
 127         convert this rspec to a dict and return it.
 128         """
 129         node = {}
 130         if not nodeDom:
 131              nodeDom = self.rootNode
 132
 133         elementName = nodeDom.nodeName
 134         if elementName and not elementName.startswith("#"):
 135             # attributes have tags and values.  get {tag: value}, else {type: value}
 136             node[elementName] = self._attributeDict(nodeDom)
 137             # resolve the child nodes.
 138             if nodeDom.hasChildNodes():
 139                 for child in nodeDom.childNodes:
 140                     childName = self._getName(child)
 141
 142                     # skip null children
 143                     if not childName: continue
 144
 145                     # initialize the possible array of children
 146                     if not node[elementName].has_key(childName): node[elementName][childName] = []
 147
 148                     if isinstance(child, minidom.Text):
 149                         # add if data is not empty
 150                         if child.data.strip():
 151                             node[elementName][childName].append(nextchild.data)
 152                     elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
 153                         for nextchild in child.childNodes:
 154                             node[elementName][childName].append(nextchild.data)
 155                     else:
 156                         childdict = self.toDict(child)
 157                         for value in childdict.values():
 158                             node[elementName][childName].append(value)
 159
 160         return node
 161
 162
 163     def toxml(self):
 164         """
 165         convert this rspec to an xml string and return it.
 166         """
 167         return self.rootNode.toxml()
 168
 169
 170     def toprettyxml(self):
 171         """
 172         print this rspec in xml in a pretty format.
 173         """
 174         return self.rootNode.toprettyxml()
 175
 176
 177     def __removeWhitespaceNodes(self, parent):
 178         for child in list(parent.childNodes):
 179             if child.nodeType == minidom.Node.TEXT_NODE and child.data.strip() == '':
 180                 parent.removeChild(child)
 181             else:
 182                 self.__removeWhitespaceNodes(child)
 183
 184     def parseFile(self, filename):
 185         """
 186         read a local xml file and store it as a dom object.
 187         """
 188         dom = minidom.parse(filename)
 189         self.__removeWhitespaceNodes(dom)
 190         self.rootNode = dom.childNodes[0]
 191
 192
 193     def parseString(self, xml):
 194         """
 195         read an xml string and store it as a dom object.
 196         """
 197         dom = minidom.parseString(xml)
 198         self.__removeWhitespaceNodes(dom)
 199         self.rootNode = dom.childNodes[0]
 200
 201
 202     def _httpGetXSD(self, xsdURI):
 203         # split the URI into relevant parts
 204         host = xsdURI.split("/")[2]
 205         if xsdURI.startswith("https"):
 206             conn = httplib.HTTPSConnection(host,
 207                 httplib.HTTPSConnection.default_port)
 208         elif xsdURI.startswith("http"):
 209             conn = httplib.HTTPConnection(host,
 210                 httplib.HTTPConnection.default_port)
 211         conn.request("GET", xsdURI)
 212         # If we can't download the schema, raise an exception
 213         r1 = conn.getresponse()
 214         if r1.status != 200:
 215             raise Exception
 216         return r1.read().replace('\n', '').replace('\t', '').strip()
 217
 218
 219     def _parseXSD(self, xsdURI):
 220         """
 221         Download XSD from URL, or if file, read local xsd file and set
 222         schemaDict.
 223
 224         Since the schema definiton is a global namespace shared by and
 225         agreed upon by others, this should probably be a URL.  Check
 226         for URL, download xsd, parse, or if local file, use that.
 227         """
 228         schemaDom = None
 229         if xsdURI.startswith("http"):
 230             try:
 231                 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
 232             except Exception, e:
 233                 # logging.debug("%s: web file not found" % xsdURI)
 234                 # logging.debug("Using local file %s" % self.xsd")
 235                 info_logger.log_exc("rspec.parseXSD: can't find %s on the web. Continuing." % xsdURI)
 236         if not schemaDom:
 237             if os.path.exists(xsdURI):
 238                 # logging.debug("using local copy.")
 239                 info_logger.debug("rspec.parseXSD: Using local %s" % xsdURI)
 240                 schemaDom = minidom.parse(xsdURI)
 241             else:
 242                 raise Exception("rspec.parseXSD: can't find xsd locally")
 243         self.schemaDict = self.toDict(schemaDom.childNodes[0])
 244
 245
 246     def dict2dom(self, rdict, include_doc = False):
 247         """
 248         convert a dict object into a dom object.
 249         """
 250
 251         def elementNode(tagname, rd):
 252             element = minidom.Element(tagname)
 253             for key in rd.keys():
 254                 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
 255                     element.setAttribute(key, unicode(rd[key]))
 256                 elif isinstance(rd[key], dict):
 257                     child = elementNode(key, rd[key])
 258                     element.appendChild(child)
 259                 elif isinstance(rd[key], list):
 260                     for item in rd[key]:
 261                         if isinstance(item, dict):
 262                             child = elementNode(key, item)
 263                             element.appendChild(child)
 264                         elif isinstance(item, StringTypes) or isinstance(item, int):
 265                             child = minidom.Element(key)
 266                             text = minidom.Text()
 267                             text.data = item
 268                             child.appendChild(text)
 269                             element.appendChild(child)
 270             return element
 271
 272         # Minidom does not allow documents to have more then one
 273         # child, but elements may have many children. Because of
 274         # this, the document's root node will be the first key/value
 275         # pair in the dictionary.
 276         node = elementNode(rdict.keys()[0], rdict.values()[0])
 277         if include_doc:
 278             rootNode = minidom.Document()
 279             rootNode.appendChild(node)
 280         else:
 281             rootNode = node
 282         return rootNode
 283
 284
 285     def parseDict(self, rdict, include_doc = True):
 286         """
 287         Convert a dictionary into a dom object and store it.
 288         """
 289         self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
 290
 291
 292     def getDictsByTagName(self, tagname, dom = None):
 293         """
 294         Search the dom for all elements with the specified tagname
 295         and return them as a list of dicts
 296         """
 297         if not dom:
 298             dom = self.rootNode
 299         dicts = []
 300         doms = dom.getElementsByTagName(tagname)
 301         dictlist = [self.toDict(d) for d in doms]
 302         for item in dictlist:
 303             for value in item.values():
 304                 dicts.append(value)
 305         return dicts
 306
 307     def getDictByTagNameValue(self, tagname, value, dom = None):
 308         """
 309         Search the dom for the first element with the specified tagname
 310         and value and return it as a dict.
 311         """
 312         tempdict = {}
 313         if not dom:
 314             dom = self.rootNode
 315         dicts = self.getDictsByTagName(tagname, dom)
 316
 317         for rdict in dicts:
 318             if rdict.has_key('name') and rdict['name'] in [value]:
 319                 return rdict
 320
 321         return tempdict
 322
 323
 324     def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
 325         """
 326         Removes all elements where:
 327         1. tagname matches the element tag
 328         2. attribute matches the element attribte
 329         3. attribute value is in valuelist
 330         """
 331
 332         tempdict = {}
 333         if not dom:
 334             dom = self.rootNode
 335
 336         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 337             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 338                 dom.parentNode.removeChild(dom)
 339             if blacklist and dom.attributes.get(attribute).value in blacklist:
 340                 dom.parentNode.removeChild(dom)
 341
 342         if dom.hasChildNodes():
 343             for child in dom.childNodes:
 344                 self.filter(tagname, attribute, blacklist, whitelist, child)
 345
 346
 347     def merge(self, rspecs, tagname, dom=None):
 348         """
 349         Merge this rspec with the requested rspec based on the specified
 350         starting tag name. The start tag (and all of its children) will be merged
 351         """
 352         tempdict = {}
 353         if not dom:
 354             dom = self.rootNode
 355
 356         whitelist = []
 357         blacklist = []
 358
 359         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 360             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 361                 dom.parentNode.removeChild(dom)
 362             if blacklist and dom.attributes.get(attribute).value in blacklist:
 363                 dom.parentNode.removeChild(dom)
 364
 365         if dom.hasChildNodes():
 366             for child in dom.childNodes:
 367                 self.filter(tagname, attribute, blacklist, whitelist, child)
 368
 369     def validateDicts(self):
 370         types = {
 371             'EInt' : int,
 372             'EString' : str,
 373             'EByteArray' : list,
 374             'EBoolean' : bool,
 375             'EFloat' : float,
 376             'EDate' : date}
 377
 378
 379     def pprint(self, r = None, depth = 0):
 380         """
 381         Pretty print the dict
 382         """
 383         line = ""
 384         if r == None: r = self.dict
 385         # Set the dept
 386         for tab in range(0,depth): line += "    "
 387         # check if it's nested
 388         if type(r) == dict:
 389             for i in r.keys():
 390                 print line + "%s:" % i
 391                 self.pprint(r[i], depth + 1)
 392         elif type(r) in (tuple, list):
 393             for j in r: self.pprint(j, depth + 1)
 394         # not nested so just print.
 395         else:
 396             print line + "%s" %  r
 397
 398
 399
 400 class RecordSpec(RSpec):
 401
 402     root_tag = 'record'
 403     def parseDict(self, rdict, include_doc = False):
 404         """
 405         Convert a dictionary into a dom object and store it.
 406         """
 407         self.rootNode = self.dict2dom(rdict, include_doc)
 408
 409     def dict2dom(self, rdict, include_doc = False):
 410         record_dict = rdict
 411         if not len(rdict.keys()) == 1:
 412             record_dict = {self.root_tag : rdict}
 413         return RSpec.dict2dom(self, record_dict, include_doc)
 414
 415
 416 # vim:ts=4:expandtab
 417