8 from xml.dom import minidom
9 from types import StringTypes, ListType
13 def __init__(self, xml = None, xsd = None, NSURL = None):
15 Class to manipulate RSpecs. Reads and parses rspec xml into python dicts
16 and reads python dicts and writes rspec xml
18 self.xsd = # Schema. Can be local or remote file.
19 self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
20 self.rootNode = # root of the DOM
21 self.dict = # dict of the RSpec.
22 self.schemaDict = {} # dict of the Schema
33 if type(xml) in StringTypes:
35 self.dict = self.toDict()
37 self._parseXSD(self.NSURL + self.xsd)
40 def _getText(self, nodelist):
43 if node.nodeType == node.TEXT_NODE:
47 # The rspec is comprised of 2 parts, and 1 reference:
48 # attributes/elements describe individual resources
49 # complexTypes are used to describe a set of attributes/elements
50 # complexTypes can include a reference to other complexTypes.
53 def _getName(self, node):
54 '''Gets name of node. If tag has no name, then return tag's localName'''
56 if not node.nodeName.startswith("#"):
59 elif node.attributes.has_key("name"):
60 name = node.attributes.get("name").value
64 # Attribute. {name : nameofattribute, {items: values})
65 def _attributeDict(self, attributeDom):
66 '''Traverse single attribute node. Create a dict {attributename : {name: value,}]}'''
67 node = {} # parsed dict
68 for attr in attributeDom.attributes.keys():
69 node[attr] = attributeDom.attributes.get(attr).value
73 def appendToDictOrCreate(self, dict, key, value):
74 if (dict.has_key(key)):
75 dict[key].append(value)
80 def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
82 convert an XML to a nested dict:
83 * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
84 * Terminal nodes (the rest) are nested dictionaries
90 curNodeName = nodeDom.localName
92 if (nodeDom.hasChildNodes()):
94 for attribute in nodeDom.attributes.keys():
95 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
96 for child in nodeDom.childNodes[:-1]:
98 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
100 childdict = self.toGenDict(child, None, childdict, curNodeName)
102 child = nodeDom.childNodes[-1]
103 if (child.nodeValue):
104 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
106 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
108 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
111 for attribute in nodeDom.attributes.keys():
112 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
114 self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
116 if (parentdict is not None):
117 parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
124 def toDict(self, nodeDom = None):
126 convert this rspec to a dict and return it.
130 nodeDom = self.rootNode
132 elementName = nodeDom.nodeName
133 if elementName and not elementName.startswith("#"):
134 # attributes have tags and values. get {tag: value}, else {type: value}
135 node[elementName] = self._attributeDict(nodeDom)
136 # resolve the child nodes.
137 if nodeDom.hasChildNodes():
138 for child in nodeDom.childNodes:
139 childName = self._getName(child)
142 if not childName: continue
144 # initialize the possible array of children
145 if not node[elementName].has_key(childName): node[elementName][childName] = []
147 if isinstance(child, minidom.Text):
148 # add if data is not empty
149 if child.data.strip():
150 node[elementName][childName].append(nextchild.data)
152 childdict = self.toDict(child)
153 for value in childdict.values():
154 node[elementName][childName].append(value)
161 convert this rspec to an xml string and return it.
163 return self.rootNode.toxml()
166 def toprettyxml(self):
168 print this rspec in xml in a pretty format.
170 return self.rootNode.toprettyxml()
173 def parseFile(self, filename):
175 read a local xml file and store it as a dom object.
177 dom = minidom.parse(filename)
178 self.rootNode = dom.childNodes[0]
181 def parseString(self, xml):
183 read an xml string and store it as a dom object.
185 xml = xml.replace('\n', '').replace('\t', '').strip()
186 dom = minidom.parseString(xml)
187 self.rootNode = dom.childNodes[0]
190 def _httpGetXSD(self, xsdURI):
191 # split the URI into relevant parts
192 host = xsdURI.split("/")[2]
193 if xsdURI.startswith("https"):
194 conn = httplib.HTTPSConnection(host,
195 httplib.HTTPSConnection.default_port)
196 elif xsdURI.startswith("http"):
197 conn = httplib.HTTPConnection(host,
198 httplib.HTTPConnection.default_port)
199 conn.request("GET", xsdURI)
200 # If we can't download the schema, raise an exception
201 r1 = conn.getresponse()
204 return r1.read().replace('\n', '').replace('\t', '').strip()
207 def _parseXSD(self, xsdURI):
209 Download XSD from URL, or if file, read local xsd file and set schemaDict
211 # Since the schema definiton is a global namespace shared by and agreed upon by
212 # others, this should probably be a URL. Check for URL, download xsd, parse, or
213 # if local file, use local file.
215 if xsdURI.startswith("http"):
217 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
219 # logging.debug("%s: web file not found" % xsdURI)
220 # logging.debug("Using local file %s" % self.xsd")
222 print "Can't find %s on the web. Continuing." % xsdURI
224 if os.path.exists(xsdURI):
225 # logging.debug("using local copy.")
226 print "Using local %s" % xsdURI
227 schemaDom = minidom.parse(xsdURI)
229 raise Exception("Can't find xsd locally")
230 self.schemaDict = self.toDict(schemaDom.childNodes[0])
233 def dict2dom(self, rdict, include_doc = False):
235 convert a dict object into a dom object.
238 def elementNode(tagname, rd):
239 element = minidom.Element(tagname)
240 for key in rd.keys():
241 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
242 element.setAttribute(key, str(rd[key]))
243 elif isinstance(rd[key], dict):
244 child = elementNode(key, rd[key])
245 element.appendChild(child)
246 elif isinstance(rd[key], list):
248 if isinstance(item, dict):
249 child = elementNode(key, item)
250 element.appendChild(child)
251 elif isinstance(item, StringTypes) or isinstance(item, int):
252 child = minidom.Element(key)
253 text = minidom.Text()
255 child.appendChild(text)
256 element.appendChild(child)
259 # Minidom does not allow documents to have more then one
260 # child, but elements may have many children. Because of
261 # this, the document's root node will be the first key/value
262 # pair in the dictionary.
263 node = elementNode(rdict.keys()[0], rdict.values()[0])
265 rootNode = minidom.Document()
266 rootNode.appendChild(node)
272 def parseDict(self, rdict, include_doc = True):
274 Convert a dictionary into a dom object and store it.
276 self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
279 def getDictsByTagName(self, tagname, dom = None):
281 Search the dom for all elements with the specified tagname
282 and return them as a list of dicts
287 doms = dom.getElementsByTagName(tagname)
288 dictlist = [self.toDict(d) for d in doms]
289 for item in dictlist:
290 for value in item.values():
294 def getDictByTagNameValue(self, tagname, value, dom = None):
296 Search the dom for the first element with the specified tagname
297 and value and return it as a dict.
302 dicts = self.getDictsByTagName(tagname, dom)
305 if rdict.has_key('name') and rdict['name'] in [value]:
311 def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
313 Removes all elements where:
314 1. tagname matches the element tag
315 2. attribute matches the element attribte
316 3. attribute value is in valuelist
323 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
324 if whitelist and dom.attributes.get(attribute).value not in whitelist:
325 dom.parentNode.removeChild(dom)
326 if blacklist and dom.attributes.get(attribute).value in blacklist:
327 dom.parentNode.removeChild(dom)
329 if dom.hasChildNodes():
330 for child in dom.childNodes:
331 self.filter(tagname, attribute, blacklist, whitelist, child)
334 def merge(self, rspecs, tagname, dom=None):
336 Merge this rspec with the requested rspec based on the specified
337 starting tag name. The start tag (and all of its children) will be merged
346 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
347 if whitelist and dom.attributes.get(attribute).value not in whitelist:
348 dom.parentNode.removeChild(dom)
349 if blacklist and dom.attributes.get(attribute).value in blacklist:
350 dom.parentNode.removeChild(dom)
352 if dom.hasChildNodes():
353 for child in dom.childNodes:
354 self.filter(tagname, attribute, blacklist, whitelist, child)
356 def validateDicts(self):
366 def pprint(self, r = None, depth = 0):
368 Pretty print the dict
371 if r == None: r = self.dict
373 for tab in range(0,depth): line += " "
374 # check if it's nested
377 print line + "%s:" % i
378 self.pprint(r[i], depth + 1)
379 elif type(r) in (tuple, list):
380 for j in r: self.pprint(j, depth + 1)
381 # not nested so just print.
383 print line + "%s" % r
387 class RecordSpec(RSpec):
390 def parseDict(self, rdict, include_doc = False):
392 Convert a dictionary into a dom object and store it.
394 self.rootNode = self.dict2dom(rdict, include_doc)
396 def dict2dom(self, rdict, include_doc = False):
398 if not len(rdict.keys()) == 1:
399 record_dict = {self.root_tag : rdict}
400 return RSpec.dict2dom(self, record_dict, include_doc)