sfa/util/rspec.py

   1 import sys
   2 import pprint
   3 import os
   4 from StringIO import StringIO
   5 from types import StringTypes, ListType
   6 import httplib
   7 from xml.dom import minidom
   8 from lxml import etree
   9 import codecs
  10 from sfa.util.sfalogging import info_logger
  11
  12 class RSpec:
  13
  14     def __init__(self, xml = None, xsd = None, NSURL = None):
  15         '''
  16         Class to manipulate RSpecs.  Reads and parses rspec xml into python dicts
  17         and reads python dicts and writes rspec xml
  18
  19         self.xsd = # Schema.  Can be local or remote file.
  20         self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
  21         self.rootNode = # root of the DOM
  22         self.dict = # dict of the RSpec.
  23         self.schemaDict = {} # dict of the Schema
  24         '''
  25
  26         self.xsd = xsd
  27         self.rootNode = None
  28         self.dict = {}
  29         self.schemaDict = {}
  30         self.NSURL = NSURL
  31         if xml:
  32             if type(xml) == file:
  33                 self.parseFile(xml)
  34             if type(xml) in StringTypes:
  35                 self.parseString(xml)
  36             self.dict = self.toDict()
  37         if xsd:
  38             self._parseXSD(self.NSURL + self.xsd)
  39
  40
  41     def _getText(self, nodelist):
  42         rc = ""
  43         for node in nodelist:
  44             if node.nodeType == node.TEXT_NODE:
  45                 rc = rc + node.data
  46         return rc
  47
  48     # The rspec is comprised of 2 parts, and 1 reference:
  49     # attributes/elements describe individual resources
  50     # complexTypes are used to describe a set of attributes/elements
  51     # complexTypes can include a reference to other complexTypes.
  52
  53
  54     def _getName(self, node):
  55         '''Gets name of node. If tag has no name, then return tag's localName'''
  56         name = None
  57         if not node.nodeName.startswith("#"):
  58             if node.localName:
  59                 name = node.localName
  60             elif node.attributes.has_key("name"):
  61                 name = node.attributes.get("name").value
  62         return name
  63
  64
  65     # Attribute.  {name : nameofattribute, {items: values})
  66     def _attributeDict(self, attributeDom):
  67         '''Traverse single attribute node.  Create a dict {attributename : {name: value,}]}'''
  68         node = {} # parsed dict
  69         for attr in attributeDom.attributes.keys():
  70             node[attr] = attributeDom.attributes.get(attr).value
  71         return node
  72
  73
  74     def appendToDictOrCreate(self, dict, key, value):
  75         if (dict.has_key(key)):
  76             dict[key].append(value)
  77         else:
  78             dict[key]=[value]
  79         return dict
  80
  81     def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
  82         """
  83         convert an XML to a nested dict:
  84           * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
  85           * Terminal nodes (the rest) are nested dictionaries
  86         """
  87
  88         if (not nodeDom):
  89             nodeDom=self.rootNode
  90
  91         curNodeName = nodeDom.localName
  92
  93         if (nodeDom.hasChildNodes()):
  94             childdict={}
  95             for attribute in nodeDom.attributes.keys():
  96                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
  97             for child in nodeDom.childNodes[:-1]:
  98                 if (child.nodeValue):
  99                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 100                 else:
 101                     childdict = self.toGenDict(child, None, childdict, curNodeName)
 102
 103             child = nodeDom.childNodes[-1]
 104             if (child.nodeValue):
 105                 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
 106                 if (childdict):
 107                     siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 108             else:
 109                 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
 110         else:
 111             childdict={}
 112             for attribute in nodeDom.attributes.keys():
 113                 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
 114
 115             self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
 116
 117         if (parentdict is not None):
 118             parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
 119             return parentdict
 120         else:
 121             return siblingdict
 122
 123
 124
 125     def toDict(self, nodeDom = None):
 126         """
 127         convert this rspec to a dict and return it.
 128         """
 129         node = {}
 130         if not nodeDom:
 131              nodeDom = self.rootNode
 132
 133         elementName = nodeDom.nodeName
 134         if elementName and not elementName.startswith("#"):
 135             # attributes have tags and values.  get {tag: value}, else {type: value}
 136             node[elementName] = self._attributeDict(nodeDom)
 137             # resolve the child nodes.
 138             if nodeDom.hasChildNodes():
 139                 for child in nodeDom.childNodes:
 140                     childName = self._getName(child)
 141
 142                     # skip null children
 143                     if not childName: continue
 144
 145                     # initialize the possible array of children
 146                     if not node[elementName].has_key(childName): node[elementName][childName] = []
 147
 148                     if isinstance(child, minidom.Text):
 149                         # add if data is not empty
 150                         if child.data.strip():
 151                             node[elementName][childName].append(nextchild.data)
 152                     elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
 153                         for nextchild in child.childNodes:
 154                             node[elementName][childName].append(nextchild.data)
 155                     else:
 156                         childdict = self.toDict(child)
 157                         for value in childdict.values():
 158                             node[elementName][childName].append(value)
 159
 160         return node
 161
 162
 163     def toxml(self):
 164         """
 165         convert this rspec to an xml string and return it.
 166         """
 167         return self.rootNode.toxml()
 168
 169
 170     def toprettyxml(self):
 171         """
 172         print this rspec in xml in a pretty format.
 173         """
 174         return self.rootNode.toprettyxml()
 175
 176
 177     def __removeWhitespaceNodes(self, parent):
 178         for child in list(parent.childNodes):
 179             if child.nodeType == minidom.Node.TEXT_NODE and child.data.strip() == '':
 180                 parent.removeChild(child)
 181             else:
 182                 self.__removeWhitespaceNodes(child)
 183
 184     def parseFile(self, filename):
 185         """
 186         read a local xml file and store it as a dom object.
 187         """
 188         dom = minidom.parse(filename)
 189         self.__removeWhitespaceNodes(dom)
 190         self.rootNode = dom.childNodes[0]
 191
 192
 193     def parseString(self, xml):
 194         """
 195         read an xml string and store it as a dom object.
 196         """
 197         print>>sys.stderr, "\r\n \t RSPEC.PY parseString xml \r\n %s " %(xml)
 198         #xmlUnicode = unicode( xml, 'utf-8' )
 199         xml = (xml.encode("utf-8"))
 200         dom = minidom.parseString(xml)
 201         print>>sys.stderr, "\r\n \t RSPEC.PY OKKK parseString dom \r\n %s " %(dom)
 202         self.__removeWhitespaceNodes(dom)
 203         self.rootNode = dom.childNodes[0]
 204
 205
 206     def _httpGetXSD(self, xsdURI):
 207         # split the URI into relevant parts
 208         host = xsdURI.split("/")[2]
 209         if xsdURI.startswith("https"):
 210             conn = httplib.HTTPSConnection(host,
 211                 httplib.HTTPSConnection.default_port)
 212         elif xsdURI.startswith("http"):
 213             conn = httplib.HTTPConnection(host,
 214                 httplib.HTTPConnection.default_port)
 215         conn.request("GET", xsdURI)
 216         # If we can't download the schema, raise an exception
 217         r1 = conn.getresponse()
 218         if r1.status != 200:
 219             raise Exception
 220         return r1.read().replace('\n', '').replace('\t', '').strip()
 221
 222
 223     def _parseXSD(self, xsdURI):
 224         """
 225         Download XSD from URL, or if file, read local xsd file and set
 226         schemaDict.
 227
 228         Since the schema definiton is a global namespace shared by and
 229         agreed upon by others, this should probably be a URL.  Check
 230         for URL, download xsd, parse, or if local file, use that.
 231         """
 232         schemaDom = None
 233         if xsdURI.startswith("http"):
 234             try:
 235                 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
 236             except Exception, e:
 237                 # logging.debug("%s: web file not found" % xsdURI)
 238                 # logging.debug("Using local file %s" % self.xsd")
 239                 info_logger.log_exc("rspec.parseXSD: can't find %s on the web. Continuing." % xsdURI)
 240         if not schemaDom:
 241             if os.path.exists(xsdURI):
 242                 # logging.debug("using local copy.")
 243                 info_logger.debug("rspec.parseXSD: Using local %s" % xsdURI)
 244                 schemaDom = minidom.parse(xsdURI)
 245             else:
 246                 raise Exception("rspec.parseXSD: can't find xsd locally")
 247         self.schemaDict = self.toDict(schemaDom.childNodes[0])
 248
 249
 250     def dict2dom(self, rdict, include_doc = False):
 251         """
 252         convert a dict object into a dom object.
 253         """
 254
 255         def elementNode(tagname, rd):
 256             element = minidom.Element(tagname)
 257             for key in rd.keys():
 258                 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
 259                     element.setAttribute(key, unicode(rd[key]))
 260                 elif isinstance(rd[key], dict):
 261                     child = elementNode(key, rd[key])
 262                     element.appendChild(child)
 263                 elif isinstance(rd[key], list):
 264                     for item in rd[key]:
 265                         if isinstance(item, dict):
 266                             child = elementNode(key, item)
 267                             element.appendChild(child)
 268                         elif isinstance(item, StringTypes) or isinstance(item, int):
 269                             child = minidom.Element(key)
 270                             text = minidom.Text()
 271                             text.data = item
 272                             child.appendChild(text)
 273                             element.appendChild(child)
 274             return element
 275
 276         # Minidom does not allow documents to have more then one
 277         # child, but elements may have many children. Because of
 278         # this, the document's root node will be the first key/value
 279         # pair in the dictionary.
 280         node = elementNode(rdict.keys()[0], rdict.values()[0])
 281         if include_doc:
 282             rootNode = minidom.Document()
 283             rootNode.appendChild(node)
 284         else:
 285             rootNode = node
 286         return rootNode
 287
 288
 289     def parseDict(self, rdict, include_doc = True):
 290         """
 291         Convert a dictionary into a dom object and store it.
 292         """
 293         self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
 294
 295
 296     def getDictsByTagName(self, tagname, dom = None):
 297         """
 298         Search the dom for all elements with the specified tagname
 299         and return them as a list of dicts
 300         """
 301         if not dom:
 302             dom = self.rootNode
 303         dicts = []
 304         doms = dom.getElementsByTagName(tagname)
 305         dictlist = [self.toDict(d) for d in doms]
 306         for item in dictlist:
 307             for value in item.values():
 308                 dicts.append(value)
 309         return dicts
 310
 311     def getDictByTagNameValue(self, tagname, value, dom = None):
 312         """
 313         Search the dom for the first element with the specified tagname
 314         and value and return it as a dict.
 315         """
 316         tempdict = {}
 317         if not dom:
 318             dom = self.rootNode
 319         dicts = self.getDictsByTagName(tagname, dom)
 320
 321         for rdict in dicts:
 322             if rdict.has_key('name') and rdict['name'] in [value]:
 323                 return rdict
 324
 325         return tempdict
 326
 327
 328     def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
 329         """
 330         Removes all elements where:
 331         1. tagname matches the element tag
 332         2. attribute matches the element attribte
 333         3. attribute value is in valuelist
 334         """
 335
 336         tempdict = {}
 337         if not dom:
 338             dom = self.rootNode
 339
 340         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 341             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 342                 dom.parentNode.removeChild(dom)
 343             if blacklist and dom.attributes.get(attribute).value in blacklist:
 344                 dom.parentNode.removeChild(dom)
 345
 346         if dom.hasChildNodes():
 347             for child in dom.childNodes:
 348                 self.filter(tagname, attribute, blacklist, whitelist, child)
 349
 350
 351     def merge(self, rspecs, tagname, dom=None):
 352         """
 353         Merge this rspec with the requested rspec based on the specified
 354         starting tag name. The start tag (and all of its children) will be merged
 355         """
 356         tempdict = {}
 357         if not dom:
 358             dom = self.rootNode
 359
 360         whitelist = []
 361         blacklist = []
 362
 363         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 364             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 365                 dom.parentNode.removeChild(dom)
 366             if blacklist and dom.attributes.get(attribute).value in blacklist:
 367                 dom.parentNode.removeChild(dom)
 368
 369         if dom.hasChildNodes():
 370             for child in dom.childNodes:
 371                 self.filter(tagname, attribute, blacklist, whitelist, child)
 372
 373     def validateDicts(self):
 374         types = {
 375             'EInt' : int,
 376             'EString' : str,
 377             'EByteArray' : list,
 378             'EBoolean' : bool,
 379             'EFloat' : float,
 380             'EDate' : date}
 381
 382
 383     def pprint(self, r = None, depth = 0):
 384         """
 385         Pretty print the dict
 386         """
 387         line = ""
 388         if r == None: r = self.dict
 389         # Set the dept
 390         for tab in range(0,depth): line += "    "
 391         # check if it's nested
 392         if type(r) == dict:
 393             for i in r.keys():
 394                 print line + "%s:" % i
 395                 self.pprint(r[i], depth + 1)
 396         elif type(r) in (tuple, list):
 397             for j in r: self.pprint(j, depth + 1)
 398         # not nested so just print.
 399         else:
 400             print line + "%s" %  r
 401
 402
 403
 404 class RecordSpec(RSpec):
 405
 406     root_tag = 'record'
 407     def parseDict(self, rdict, include_doc = False):
 408         """
 409         Convert a dictionary into a dom object and store it.
 410         """
 411         self.rootNode = self.dict2dom(rdict, include_doc)
 412
 413     def dict2dom(self, rdict, include_doc = False):
 414         record_dict = rdict
 415         if not len(rdict.keys()) == 1:
 416             record_dict = {self.root_tag : rdict}
 417         return RSpec.dict2dom(self, record_dict, include_doc)
 418
 419
 420 # vim:ts=4:expandtab
 421