8 from xml.dom import minidom
9 from types import StringTypes, ListType
13 def __init__(self, xml = None, xsd = None, NSURL = None):
15 Class to manipulate RSpecs. Reads and parses rspec xml into python dicts
16 and reads python dicts and writes rspec xml
18 self.xsd = # Schema. Can be local or remote file.
19 self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
20 self.rootNode = # root of the DOM
21 self.dict = # dict of the RSpec.
22 self.schemaDict = {} # dict of the Schema
35 self.dict = self.toDict()
37 self._parseXSD(self.NSURL + self.xsd)
40 def _getText(self, nodelist):
43 if node.nodeType == node.TEXT_NODE:
47 # The rspec is comprised of 2 parts, and 1 reference:
48 # attributes/elements describe individual resources
49 # complexTypes are used to describe a set of attributes/elements
50 # complexTypes can include a reference to other complexTypes.
53 def _getName(self, node):
54 '''Gets name of node. If tag has no name, then return tag's localName'''
56 if not node.nodeName.startswith("#"):
59 elif node.attributes.has_key("name"):
60 name = node.attributes.get("name").value
64 # Attribute. {name : nameofattribute, {items: values})
65 def _attributeDict(self, attributeDom):
66 '''Traverse single attribute node. Create a dict {attributename : {name: value,}]}'''
67 node = {} # parsed dict
68 for attr in attributeDom.attributes.keys():
69 node[attr] = attributeDom.attributes.get(attr).value
73 def appendToDictOrCreate(self, dict, key, value):
74 if (dict.has_key(key)):
75 dict[key].append(value)
80 def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
82 convert an XML to a nested dict:
83 * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
84 * Terminal nodes (the rest) are nested dictionaries
90 curNodeName = nodeDom.localName
92 if (nodeDom.hasChildNodes()):
94 for attribute in nodeDom.attributes.keys():
95 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
96 for child in nodeDom.childNodes[:-1]:
98 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
100 childdict = self.toGenDict(child, None, childdict, curNodeName)
102 child = nodeDom.childNodes[-1]
103 if (child.nodeValue):
104 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
106 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
108 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
111 for attribute in nodeDom.attributes.keys():
112 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
114 self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
116 if (parentdict is not None):
117 parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
124 def toDict(self, nodeDom = None):
126 convert this rspec to a dict and return it.
130 nodeDom = self.rootNode
132 elementName = nodeDom.nodeName
133 if elementName and not elementName.startswith("#"):
134 # attributes have tags and values. get {tag: value}, else {type: value}
135 node[elementName] = self._attributeDict(nodeDom)
136 # resolve the child nodes.
137 if nodeDom.hasChildNodes():
138 for child in nodeDom.childNodes:
139 childName = self._getName(child)
143 # initialize the possible array of children
144 if not node[elementName].has_key(childName):
145 node[elementName][childName] = []
146 # if child node has text child nodes
147 # append the children to the array as strings
148 if child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
149 for nextchild in child.childNodes:
150 node[elementName][childName].append(nextchild.data)
151 # convert element child node to dict
153 childdict = self.toDict(child)
154 for value in childdict.values():
155 node[elementName][childName].append(value)
156 #node[childName].append(self.toDict(child))
162 convert this rspec to an xml string and return it.
164 return self.rootNode.toxml()
167 def toprettyxml(self):
169 print this rspec in xml in a pretty format.
171 return self.rootNode.toprettyxml()
174 def parseFile(self, filename):
176 read a local xml file and store it as a dom object.
178 dom = minidom.parse(filename)
179 self.rootNode = dom.childNodes[0]
182 def parseString(self, xml):
184 read an xml string and store it as a dom object.
186 xml = xml.replace('\n', '').replace('\t', '').strip()
187 dom = minidom.parseString(xml)
188 self.rootNode = dom.childNodes[0]
191 def _httpGetXSD(self, xsdURI):
192 # split the URI into relevant parts
193 host = xsdURI.split("/")[2]
194 if xsdURI.startswith("https"):
195 conn = httplib.HTTPSConnection(host,
196 httplib.HTTPSConnection.default_port)
197 elif xsdURI.startswith("http"):
198 conn = httplib.HTTPConnection(host,
199 httplib.HTTPConnection.default_port)
200 conn.request("GET", xsdURI)
201 # If we can't download the schema, raise an exception
202 r1 = conn.getresponse()
205 return r1.read().replace('\n', '').replace('\t', '').strip()
208 def _parseXSD(self, xsdURI):
210 Download XSD from URL, or if file, read local xsd file and set schemaDict
212 # Since the schema definiton is a global namespace shared by and agreed upon by
213 # others, this should probably be a URL. Check for URL, download xsd, parse, or
214 # if local file, use local file.
216 if xsdURI.startswith("http"):
218 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
220 # logging.debug("%s: web file not found" % xsdURI)
221 # logging.debug("Using local file %s" % self.xsd")
223 print "Can't find %s on the web. Continuing." % xsdURI
225 if os.path.exists(xsdURI):
226 # logging.debug("using local copy.")
227 print "Using local %s" % xsdURI
228 schemaDom = minidom.parse(xsdURI)
230 raise Exception("Can't find xsd locally")
231 self.schemaDict = self.toDict(schemaDom.childNodes[0])
234 def dict2dom(self, rdict, include_doc = False):
236 convert a dict object into a dom object.
239 def elementNode(tagname, rd):
240 element = minidom.Element(tagname)
241 for key in rd.keys():
242 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
243 element.setAttribute(key, str(rd[key]))
244 elif isinstance(rd[key], dict):
245 child = elementNode(key, rd[key])
246 element.appendChild(child)
247 elif isinstance(rd[key], list):
249 if isinstance(item, dict):
250 child = elementNode(key, item)
251 element.appendChild(child)
252 elif isinstance(item, StringTypes) or isinstance(item, int):
253 child = minidom.Element(key)
254 text = minidom.Text()
256 child.appendChild(text)
257 element.appendChild(child)
260 # Minidom does not allow documents to have more then one
261 # child, but elements may have many children. Because of
262 # this, the document's root node will be the first key/value
263 # pair in the dictionary.
264 node = elementNode(rdict.keys()[0], rdict.values()[0])
266 rootNode = minidom.Document()
267 rootNode.appendChild(node)
273 def parseDict(self, rdict, include_doc = True):
275 Convert a dictionary into a dom object and store it.
277 self.rootNode = self.dict2dom(rdict, include_doc)
280 def getDictsByTagName(self, tagname, dom = None):
282 Search the dom for all elements with the specified tagname
283 and return them as a list of dicts
288 doms = dom.getElementsByTagName(tagname)
289 dictlist = [self.toDict(d) for d in doms]
290 for item in dictlist:
291 for value in item.values():
295 def getDictByTagNameValue(self, tagname, value, dom = None):
297 Search the dom for the first element with the specified tagname
298 and value and return it as a dict.
303 dicts = self.getDictsByTagName(tagname, dom)
306 if rdict.has_key('name') and rdict['name'] in [value]:
312 def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
314 Removes all elements where:
315 1. tagname matches the element tag
316 2. attribute matches the element attribte
317 3. attribute value is in valuelist
324 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
325 if whitelist and dom.attributes.get(attribute).value not in whitelist:
326 dom.parentNode.removeChild(dom)
327 if blacklist and dom.attributes.get(attribute).value in blacklist:
328 dom.parentNode.removeChild(dom)
330 if dom.hasChildNodes():
331 for child in dom.childNodes:
332 self.filter(tagname, attribute, blacklist, whitelist, child)
335 def validateDicts(self):
345 def pprint(self, r = None, depth = 0):
347 Pretty print the dict
350 if r == None: r = self.dict
352 for tab in range(0,depth): line += " "
353 # check if it's nested
356 print line + "%s:" % i
357 self.pprint(r[i], depth + 1)
358 elif type(r) in (tuple, list):
359 for j in r: self.pprint(j, depth + 1)
360 # not nested so just print.
362 print line + "%s" % r
366 class RecordSpec(Rspec):
369 def parseDict(self, rdict, include_doc = False):
371 Convert a dictionary into a dom object and store it.
373 self.rootNode = self.dict2dom(rdict, include_doc)
375 def dict2dom(self, rdict, include_doc = False):
377 if not len(rdict.keys()) == 1:
378 record_dict = {self.root_tag : rdict}
379 return Rspec.dict2dom(self, record_dict, include_doc)