sfa/util/rspec.py

   1 ### $Id$
   2 ### $URL$
   3
   4 import sys
   5 import pprint
   6 import os
   7 import httplib
   8 from xml.dom import minidom
   9 from types import StringTypes, ListType
  10 from lxml import etree
  11 from StringIO import StringIO
  12
  13 from sfa.util.sfalogging import sfa_logger
  14
  15 class RSpec:
  16
  17     def __init__(self, xml = None, xsd = None, NSURL = None):
  18         '''
  19         Class to manipulate RSpecs.  Reads and parses rspec xml into python dicts
  20         and reads python dicts and writes rspec xml
  21
  22         self.xsd = # Schema.  Can be local or remote file.
  23         self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
  24         self.rootNode = # root of the DOM
  25         self.dict = # dict of the RSpec.
  26         self.schemaDict = {} # dict of the Schema
  27         '''
  28
  29         self.xsd = xsd
  30         self.rootNode = None
  31         self.dict = {}
  32         self.schemaDict = {}
  33         self.NSURL = NSURL
  34         if xml:
  35             if type(xml) == file:
  36                 self.parseFile(xml)
  37             if type(xml) in StringTypes:
  38                 self.parseString(xml)
  39             self.dict = self.toDict()
  40         if xsd:
  41             self._parseXSD(self.NSURL + self.xsd)
  42
  43
  44     def _getText(self, nodelist):
  45         rc = ""
  46         for node in nodelist:
  47             if node.nodeType == node.TEXT_NODE:
  48                 rc = rc + node.data
  49         return rc
  50
  51     # The rspec is comprised of 2 parts, and 1 reference:
  52     # attributes/elements describe individual resources
  53     # complexTypes are used to describe a set of attributes/elements
  54     # complexTypes can include a reference to other complexTypes.
  55
  56
  57     def _getName(self, node):
  58         '''Gets name of node. If tag has no name, then return tag's localName'''
  59         name = None
  60         if not node.nodeName.startswith("#"):
  61             if node.localName:
  62                 name = node.localName
  63             elif node.attributes.has_key("name"):
  64                 name = node.attributes.get("name").value
  65         return name
  66
  67
  68     # Attribute.  {name : nameofattribute, {items: values})
  69     def _attributeDict(self, attributeDom):
  70         '''Traverse single attribute node.  Create a dict {attributename : {name: value,}]}'''
  71         node = {} # parsed dict
  72         for attr in attributeDom.attributes.keys():
  73             node[attr] = attributeDom.attributes.get(attr).value
  74         return node
  75
  76
  77     def appendToDictOrCreate(self, dict, key, value):
  78         if (dict.has_key(key)):
  79             dict[key].append(value)
  80         else:
  81             dict[key]=[value]
  82         return dict
  83
  84     def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
  85         """
  86         convert an XML to a nested dict:
  87           * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
  88           * Terminal nodes (the rest) are nested dictionaries
  89         """
  90
  91         if (not nodeDom):
  92             nodeDom=self.rootNode
  93
  94         curNodeName = nodeDom.localName
  95
  96         if (nodeDom.hasChildNodes()):
  97             childdict={}
  98             for attribute in nodeDom.attributes.keys():
  99                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
 100             for child in nodeDom.childNodes[:-1]:
 101                 if (child.nodeValue):
 102                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 103                 else:
 104                     childdict = self.toGenDict(child, None, childdict, curNodeName)
 105
 106             child = nodeDom.childNodes[-1]
 107             if (child.nodeValue):
 108                 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 109                 if (childdict):
 110                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 111             else:
 112                 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
 113         else:
 114             childdict={}
 115             for attribute in nodeDom.attributes.keys():
 116                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
 117
 118             self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 119
 120         if (parentdict is not None):
 121             parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
 122             return parentdict
 123         else:
 124             return siblingdict
 125
 126
 127
 128     def toDict(self, nodeDom = None):
 129         """
 130         convert this rspec to a dict and return it.
 131         """
 132         node = {}
 133         if not nodeDom:
 134              nodeDom = self.rootNode
 135
 136         elementName = nodeDom.nodeName
 137         if elementName and not elementName.startswith("#"):
 138             # attributes have tags and values.  get {tag: value}, else {type: value}
 139             node[elementName] = self._attributeDict(nodeDom)
 140             # resolve the child nodes.
 141             if nodeDom.hasChildNodes():
 142                 for child in nodeDom.childNodes:
 143                     childName = self._getName(child)
 144
 145                     # skip null children
 146                     if not childName: continue
 147
 148                     # initialize the possible array of children
 149                     if not node[elementName].has_key(childName): node[elementName][childName] = []
 150
 151                     if isinstance(child, minidom.Text):
 152                         # add if data is not empty
 153                         if child.data.strip():
 154                             node[elementName][childName].append(nextchild.data)
 155                     elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
 156                         for nextchild in child.childNodes:
 157                             node[elementName][childName].append(nextchild.data)
 158                     else:
 159                         childdict = self.toDict(child)
 160                         for value in childdict.values():
 161                             node[elementName][childName].append(value)
 162
 163         return node
 164
 165
 166     def toxml(self):
 167         """
 168         convert this rspec to an xml string and return it.
 169         """
 170         return self.rootNode.toxml()
 171
 172
 173     def toprettyxml(self):
 174         """
 175         print this rspec in xml in a pretty format.
 176         """
 177         return self.rootNode.toprettyxml()
 178
 179
 180     def __removeWhitespaceNodes(self, parent):
 181         for child in list(parent.childNodes):
 182             if child.nodeType == minidom.Node.TEXT_NODE and child.data.strip() == '':
 183                 parent.removeChild(child)
 184             else:
 185                 self.__removeWhitespaceNodes(child)
 186
 187     def parseFile(self, filename):
 188         """
 189         read a local xml file and store it as a dom object.
 190         """
 191         dom = minidom.parse(filename)
 192         self.__removeWhitespaceNodes(dom)
 193         self.rootNode = dom.childNodes[0]
 194
 195
 196     def parseString(self, xml):
 197         """
 198         read an xml string and store it as a dom object.
 199         """
 200         dom = minidom.parseString(xml)
 201         self.__removeWhitespaceNodes(dom)
 202         self.rootNode = dom.childNodes[0]
 203
 204
 205     def _httpGetXSD(self, xsdURI):
 206         # split the URI into relevant parts
 207         host = xsdURI.split("/")[2]
 208         if xsdURI.startswith("https"):
 209             conn = httplib.HTTPSConnection(host,
 210                 httplib.HTTPSConnection.default_port)
 211         elif xsdURI.startswith("http"):
 212             conn = httplib.HTTPConnection(host,
 213                 httplib.HTTPConnection.default_port)
 214         conn.request("GET", xsdURI)
 215         # If we can't download the schema, raise an exception
 216         r1 = conn.getresponse()
 217         if r1.status != 200:
 218             raise Exception
 219         return r1.read().replace('\n', '').replace('\t', '').strip()
 220
 221
 222     def _parseXSD(self, xsdURI):
 223         """
 224         Download XSD from URL, or if file, read local xsd file and set
 225         schemaDict.
 226
 227         Since the schema definiton is a global namespace shared by and
 228         agreed upon by others, this should probably be a URL.  Check
 229         for URL, download xsd, parse, or if local file, use that.
 230         """
 231         schemaDom = None
 232         if xsdURI.startswith("http"):
 233             try:
 234                 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
 235             except Exception, e:
 236                 # logging.debug("%s: web file not found" % xsdURI)
 237                 # logging.debug("Using local file %s" % self.xsd")
 238                 sfa_logger().log_exc("rspec.parseXSD: can't find %s on the web. Continuing." % xsdURI)
 239         if not schemaDom:
 240             if os.path.exists(xsdURI):
 241                 # logging.debug("using local copy.")
 242                 sfa_logger().debug("rspec.parseXSD: Using local %s" % xsdURI)
 243                 schemaDom = minidom.parse(xsdURI)
 244             else:
 245                 raise Exception("rspec.parseXSD: can't find xsd locally")
 246         self.schemaDict = self.toDict(schemaDom.childNodes[0])
 247
 248
 249     def dict2dom(self, rdict, include_doc = False):
 250         """
 251         convert a dict object into a dom object.
 252         """
 253
 254         def elementNode(tagname, rd):
 255             element = minidom.Element(tagname)
 256             for key in rd.keys():
 257                 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
 258                     element.setAttribute(key, str(rd[key]))
 259                 elif isinstance(rd[key], dict):
 260                     child = elementNode(key, rd[key])
 261                     element.appendChild(child)
 262                 elif isinstance(rd[key], list):
 263                     for item in rd[key]:
 264                         if isinstance(item, dict):
 265                             child = elementNode(key, item)
 266                             element.appendChild(child)
 267                         elif isinstance(item, StringTypes) or isinstance(item, int):
 268                             child = minidom.Element(key)
 269                             text = minidom.Text()
 270                             text.data = item
 271                             child.appendChild(text)
 272                             element.appendChild(child)
 273             return element
 274
 275         # Minidom does not allow documents to have more then one
 276         # child, but elements may have many children. Because of
 277         # this, the document's root node will be the first key/value
 278         # pair in the dictionary.
 279         node = elementNode(rdict.keys()[0], rdict.values()[0])
 280         if include_doc:
 281             rootNode = minidom.Document()
 282             rootNode.appendChild(node)
 283         else:
 284             rootNode = node
 285         return rootNode
 286
 287
 288     def parseDict(self, rdict, include_doc = True):
 289         """
 290         Convert a dictionary into a dom object and store it.
 291         """
 292         self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
 293
 294
 295     def getDictsByTagName(self, tagname, dom = None):
 296         """
 297         Search the dom for all elements with the specified tagname
 298         and return them as a list of dicts
 299         """
 300         if not dom:
 301             dom = self.rootNode
 302         dicts = []
 303         doms = dom.getElementsByTagName(tagname)
 304         dictlist = [self.toDict(d) for d in doms]
 305         for item in dictlist:
 306             for value in item.values():
 307                 dicts.append(value)
 308         return dicts
 309
 310     def getDictByTagNameValue(self, tagname, value, dom = None):
 311         """
 312         Search the dom for the first element with the specified tagname
 313         and value and return it as a dict.
 314         """
 315         tempdict = {}
 316         if not dom:
 317             dom = self.rootNode
 318         dicts = self.getDictsByTagName(tagname, dom)
 319
 320         for rdict in dicts:
 321             if rdict.has_key('name') and rdict['name'] in [value]:
 322                 return rdict
 323
 324         return tempdict
 325
 326
 327     def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
 328         """
 329         Removes all elements where:
 330         1. tagname matches the element tag
 331         2. attribute matches the element attribte
 332         3. attribute value is in valuelist
 333         """
 334
 335         tempdict = {}
 336         if not dom:
 337             dom = self.rootNode
 338
 339         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 340             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 341                 dom.parentNode.removeChild(dom)
 342             if blacklist and dom.attributes.get(attribute).value in blacklist:
 343                 dom.parentNode.removeChild(dom)
 344
 345         if dom.hasChildNodes():
 346             for child in dom.childNodes:
 347                 self.filter(tagname, attribute, blacklist, whitelist, child)
 348
 349
 350     def merge(self, rspecs, tagname, dom=None):
 351         """
 352         Merge this rspec with the requested rspec based on the specified
 353         starting tag name. The start tag (and all of its children) will be merged
 354         """
 355         tempdict = {}
 356         if not dom:
 357             dom = self.rootNode
 358
 359         whitelist = []
 360         blacklist = []
 361
 362         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 363             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 364                 dom.parentNode.removeChild(dom)
 365             if blacklist and dom.attributes.get(attribute).value in blacklist:
 366                 dom.parentNode.removeChild(dom)
 367
 368         if dom.hasChildNodes():
 369             for child in dom.childNodes:
 370                 self.filter(tagname, attribute, blacklist, whitelist, child)
 371
 372     def validateDicts(self):
 373         types = {
 374             'EInt' : int,
 375             'EString' : str,
 376             'EByteArray' : list,
 377             'EBoolean' : bool,
 378             'EFloat' : float,
 379             'EDate' : date}
 380
 381
 382     def pprint(self, r = None, depth = 0):
 383         """
 384         Pretty print the dict
 385         """
 386         line = ""
 387         if r == None: r = self.dict
 388         # Set the dept
 389         for tab in range(0,depth): line += "    "
 390         # check if it's nested
 391         if type(r) == dict:
 392             for i in r.keys():
 393                 print line + "%s:" % i
 394                 self.pprint(r[i], depth + 1)
 395         elif type(r) in (tuple, list):
 396             for j in r: self.pprint(j, depth + 1)
 397         # not nested so just print.
 398         else:
 399             print line + "%s" %  r
 400
 401
 402
 403 class RecordSpec(RSpec):
 404
 405     root_tag = 'record'
 406     def parseDict(self, rdict, include_doc = False):
 407         """
 408         Convert a dictionary into a dom object and store it.
 409         """
 410         self.rootNode = self.dict2dom(rdict, include_doc)
 411
 412     def dict2dom(self, rdict, include_doc = False):
 413         record_dict = rdict
 414         if not len(rdict.keys()) == 1:
 415             record_dict = {self.root_tag : rdict}
 416         return RSpec.dict2dom(self, record_dict, include_doc)
 417
 418
 419 # vim:ts=4:expandtab
 420