sfa/util/rspec.py

   1 ### $Id$
   2 ### $URL$
   3
   4 import sys
   5 import pprint
   6 import os
   7 import httplib
   8 from xml.dom import minidom
   9 from types import StringTypes, ListType
  10
  11 class Rspec:
  12
  13     def __init__(self, xml = None, xsd = None, NSURL = None):
  14         '''
  15         Class to manipulate RSpecs.  Reads and parses rspec xml into python dicts
  16         and reads python dicts and writes rspec xml
  17
  18         self.xsd = # Schema.  Can be local or remote file.
  19         self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
  20         self.rootNode = # root of the DOM
  21         self.dict = # dict of the RSpec.
  22         self.schemaDict = {} # dict of the Schema
  23         '''
  24
  25         self.xsd = xsd
  26         self.rootNode = None
  27         self.dict = {}
  28         self.schemaDict = {}
  29         self.NSURL = NSURL
  30         if xml:
  31             if type(xml) == file:
  32                 self.parseFile(xml)
  33             if type(xml) == str:
  34                 self.parseString(xml)
  35             self.dict = self.toDict()
  36         if xsd:
  37             self._parseXSD(self.NSURL + self.xsd)
  38
  39
  40     def _getText(self, nodelist):
  41         rc = ""
  42         for node in nodelist:
  43             if node.nodeType == node.TEXT_NODE:
  44                 rc = rc + node.data
  45         return rc
  46
  47     # The rspec is comprised of 2 parts, and 1 reference:
  48     # attributes/elements describe individual resources
  49     # complexTypes are used to describe a set of attributes/elements
  50     # complexTypes can include a reference to other complexTypes.
  51
  52
  53     def _getName(self, node):
  54         '''Gets name of node. If tag has no name, then return tag's localName'''
  55         name = None
  56         if not node.nodeName.startswith("#"):
  57             if node.localName:
  58                 name = node.localName
  59             elif node.attributes.has_key("name"):
  60                 name = node.attributes.get("name").value
  61         return name
  62
  63
  64     # Attribute.  {name : nameofattribute, {items: values})
  65     def _attributeDict(self, attributeDom):
  66         '''Traverse single attribute node.  Create a dict {attributename : {name: value,}]}'''
  67         node = {} # parsed dict
  68         for attr in attributeDom.attributes.keys():
  69             node[attr] = attributeDom.attributes.get(attr).value
  70         return node
  71
  72
  73     def toDict(self, nodeDom = None):
  74         """
  75         convert this rspec to a dict and return it.
  76         """
  77         node = {}
  78         if not nodeDom:
  79              nodeDom = self.rootNode
  80
  81         elementName = nodeDom.nodeName
  82         if elementName and not elementName.startswith("#"):
  83             # attributes have tags and values.  get {tag: value}, else {type: value}
  84             node[elementName] = self._attributeDict(nodeDom)
  85             # resolve the child nodes.
  86             if nodeDom.hasChildNodes():
  87                 for child in nodeDom.childNodes:
  88                     childName = self._getName(child)
  89                     # skip null children
  90                     if not childName:
  91                         continue
  92                     # initialize the possible array of children
  93                     if not node[elementName].has_key(childName):
  94                         node[elementName][childName] = []
  95                     # if child node has text child nodes
  96                     # append the children to the array as strings
  97                     if child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
  98                         for nextchild in child.childNodes:
  99                             node[elementName][childName].append(nextchild.data)
 100                     # convert element child node to dict
 101                     else:
 102                         childdict = self.toDict(child)
 103                         for value in childdict.values():
 104                             node[elementName][childName].append(value)
 105                     #node[childName].append(self.toDict(child))
 106         return node
 107
 108
 109     def toxml(self):
 110         """
 111         convert this rspec to an xml string and return it.
 112         """
 113         return self.rootNode.toxml()
 114
 115
 116     def toprettyxml(self):
 117         """
 118         print this rspec in xml in a pretty format.
 119         """
 120         return self.rootNode.toprettyxml()
 121
 122
 123     def parseFile(self, filename):
 124         """
 125         read a local xml file and store it as a dom object.
 126         """
 127         dom = minidom.parse(filename)
 128         self.rootNode = dom.childNodes[0]
 129
 130
 131     def parseString(self, xml):
 132         """
 133         read an xml string and store it as a dom object.
 134         """
 135         xml = xml.replace('\n', '').replace('\t', '').strip()
 136         dom = minidom.parseString(xml)
 137         self.rootNode = dom.childNodes[0]
 138
 139
 140     def _httpGetXSD(self, xsdURI):
 141         # split the URI into relevant parts
 142         host = xsdURI.split("/")[2]
 143         if xsdURI.startswith("https"):
 144             conn = httplib.HTTPSConnection(host,
 145                 httplib.HTTPSConnection.default_port)
 146         elif xsdURI.startswith("http"):
 147             conn = httplib.HTTPConnection(host,
 148                 httplib.HTTPConnection.default_port)
 149         conn.request("GET", xsdURI)
 150         # If we can't download the schema, raise an exception
 151         r1 = conn.getresponse()
 152         if r1.status != 200:
 153             raise Exception
 154         return r1.read().replace('\n', '').replace('\t', '').strip()
 155
 156
 157     def _parseXSD(self, xsdURI):
 158         """
 159         Download XSD from URL, or if file, read local xsd file and set schemaDict
 160         """
 161         # Since the schema definiton is a global namespace shared by and agreed upon by
 162         # others, this should probably be a URL.  Check for URL, download xsd, parse, or
 163         # if local file, use local file.
 164         schemaDom = None
 165         if xsdURI.startswith("http"):
 166             try:
 167                 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
 168             except Exception, e:
 169                 # logging.debug("%s: web file not found" % xsdURI)
 170                 # logging.debug("Using local file %s" % self.xsd")
 171                 print e
 172                 print "Can't find %s on the web. Continuing." % xsdURI
 173         if not schemaDom:
 174             if os.path.exists(xsdURI):
 175                 # logging.debug("using local copy.")
 176                 print "Using local %s" % xsdURI
 177                 schemaDom = minidom.parse(xsdURI)
 178             else:
 179                 raise Exception("Can't find xsd locally")
 180         self.schemaDict = self.toDict(schemaDom.childNodes[0])
 181
 182
 183     def dict2dom(self, rdict, include_doc = False):
 184         """
 185         convert a dict object into a dom object.
 186         """
 187
 188         def elementNode(tagname, rd):
 189             element = minidom.Element(tagname)
 190             for key in rd.keys():
 191                 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
 192                     element.setAttribute(key, str(rd[key]))
 193                 elif isinstance(rd[key], dict):
 194                     child = elementNode(key, rd[key])
 195                     element.appendChild(child)
 196                 elif isinstance(rd[key], list):
 197                     for item in rd[key]:
 198                         if isinstance(item, dict):
 199                             child = elementNode(key, item)
 200                             element.appendChild(child)
 201                         elif isinstance(item, StringTypes) or isinstance(item, int):
 202                             child = minidom.Element(key)
 203                             text = minidom.Text()
 204                             text.data = item
 205                             child.appendChild(text)
 206                             element.appendChild(child)
 207             return element
 208
 209         # Minidom does not allow documents to have more then one
 210         # child, but elements may have many children. Because of
 211         # this, the document's root node will be the first key/value
 212         # pair in the dictionary.
 213         node = elementNode(rdict.keys()[0], rdict.values()[0])
 214         if include_doc:
 215             rootNode = minidom.Document()
 216             rootNode.appendChild(node)
 217         else:
 218             rootNode = node
 219         return rootNode
 220
 221
 222     def parseDict(self, rdict, include_doc = True):
 223         """
 224         Convert a dictionary into a dom object and store it.
 225         """
 226         self.rootNode = self.dict2dom(rdict, include_doc)
 227
 228
 229     def getDictsByTagName(self, tagname, dom = None):
 230         """
 231         Search the dom for all elements with the specified tagname
 232         and return them as a list of dicts
 233         """
 234         if not dom:
 235             dom = self.rootNode
 236         dicts = []
 237         doms = dom.getElementsByTagName(tagname)
 238         dictlist = [self.toDict(d) for d in doms]
 239         for item in dictlist:
 240             for value in item.values():
 241                 dicts.append(value)
 242         return dicts
 243
 244     def getDictByTagNameValue(self, tagname, value, dom = None):
 245         """
 246         Search the dom for the first element with the specified tagname
 247         and value and return it as a dict.
 248         """
 249         tempdict = {}
 250         if not dom:
 251             dom = self.rootNode
 252         dicts = self.getDictsByTagName(tagname, dom)
 253
 254         for rdict in dicts:
 255             if rdict.has_key('name') and rdict['name'] in [value]:
 256                 return rdict
 257
 258         return tempdict
 259
 260
 261     def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
 262         """
 263         Removes all elements where:
 264         1. tagname matches the element tag
 265         2. attribute matches the element attribte
 266         3. attribute value is in valuelist
 267         """
 268
 269         tempdict = {}
 270         if not dom:
 271             dom = self.rootNode
 272
 273         if dom.localName in [tagname] and dom.attributes.has_key(attribute):
 274             if whitelist and dom.attributes.get(attribute).value not in whitelist:
 275                 dom.parentNode.removeChild(dom)
 276             if blacklist and dom.attributes.get(attribute).value in blacklist:
 277                 dom.parentNode.removeChild(dom)
 278
 279         if dom.hasChildNodes():
 280             for child in dom.childNodes:
 281                 self.filter(tagname, attribute, blacklist, whitelist, child)
 282
 283
 284     def validateDicts(self):
 285         types = {
 286             'EInt' : int,
 287             'EString' : str,
 288             'EByteArray' : list,
 289             'EBoolean' : bool,
 290             'EFloat' : float,
 291             'EDate' : date}
 292
 293
 294     def pprint(self, r = None, depth = 0):
 295         """
 296         Pretty print the dict
 297         """
 298         line = ""
 299         if r == None: r = self.dict
 300         # Set the dept
 301         for tab in range(0,depth): line += "    "
 302         # check if it's nested
 303         if type(r) == dict:
 304             for i in r.keys():
 305                 print line + "%s:" % i
 306                 self.pprint(r[i], depth + 1)
 307         elif type(r) in (tuple, list):
 308             for j in r: self.pprint(j, depth + 1)
 309         # not nested so just print.
 310         else:
 311             print line + "%s" %  r
 312
 313
 314
 315 class RecordSpec(Rspec):
 316
 317     root_tag = 'record'
 318     def parseDict(self, rdict, include_doc = False):
 319         """
 320         Convert a dictionary into a dom object and store it.
 321         """
 322         self.rootNode = self.dict2dom(rdict, include_doc)
 323
 324     def dict2dom(self, rdict, include_doc = False):
 325         record_dict = rdict
 326         if not len(rdict.keys()) == 1:
 327             record_dict = {self.root_tag : rdict}
 328         return Rspec.dict2dom(self, record_dict, include_doc)
 329
 330
 331 # vim:ts=4:expandtab
 332