8 from xml.dom import minidom
9 from types import StringTypes, ListType
13 def __init__(self, xml = None, xsd = None, NSURL = None):
15 Class to manipulate RSpecs. Reads and parses rspec xml into python dicts
16 and reads python dicts and writes rspec xml
18 self.xsd = # Schema. Can be local or remote file.
19 self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
20 self.rootNode = # root of the DOM
21 self.dict = # dict of the RSpec.
22 self.schemaDict = {} # dict of the Schema
33 if type(xml) in StringTypes:
35 self.dict = self.toDict()
37 self._parseXSD(self.NSURL + self.xsd)
40 def _getText(self, nodelist):
43 if node.nodeType == node.TEXT_NODE:
47 # The rspec is comprised of 2 parts, and 1 reference:
48 # attributes/elements describe individual resources
49 # complexTypes are used to describe a set of attributes/elements
50 # complexTypes can include a reference to other complexTypes.
53 def _getName(self, node):
54 '''Gets name of node. If tag has no name, then return tag's localName'''
56 if not node.nodeName.startswith("#"):
59 elif node.attributes.has_key("name"):
60 name = node.attributes.get("name").value
64 # Attribute. {name : nameofattribute, {items: values})
65 def _attributeDict(self, attributeDom):
66 '''Traverse single attribute node. Create a dict {attributename : {name: value,}]}'''
67 node = {} # parsed dict
68 for attr in attributeDom.attributes.keys():
69 node[attr] = attributeDom.attributes.get(attr).value
73 def appendToDictOrCreate(self, dict, key, value):
74 if (dict.has_key(key)):
75 dict[key].append(value)
80 def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
82 convert an XML to a nested dict:
83 * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
84 * Terminal nodes (the rest) are nested dictionaries
90 curNodeName = nodeDom.localName
92 if (nodeDom.hasChildNodes()):
94 for attribute in nodeDom.attributes.keys():
95 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
96 for child in nodeDom.childNodes[:-1]:
98 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
100 childdict = self.toGenDict(child, None, childdict, curNodeName)
102 child = nodeDom.childNodes[-1]
103 if (child.nodeValue):
104 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
106 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
108 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
111 for attribute in nodeDom.attributes.keys():
112 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
114 self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
116 if (parentdict is not None):
117 parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
124 def toDict(self, nodeDom = None):
126 convert this rspec to a dict and return it.
130 nodeDom = self.rootNode
132 elementName = nodeDom.nodeName
133 if elementName and not elementName.startswith("#"):
134 # attributes have tags and values. get {tag: value}, else {type: value}
135 node[elementName] = self._attributeDict(nodeDom)
136 # resolve the child nodes.
137 if nodeDom.hasChildNodes():
138 for child in nodeDom.childNodes:
139 childName = self._getName(child)
142 if not childName: continue
144 # initialize the possible array of children
145 if not node[elementName].has_key(childName): node[elementName][childName] = []
147 if isinstance(child, minidom.Text):
148 # add if data is not empty
149 if child.data.strip():
150 node[elementName][childName].append(nextchild.data)
151 elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
152 for nextchild in child.childNodes:
153 node[elementName][childName].append(nextchild.data)
155 childdict = self.toDict(child)
156 for value in childdict.values():
157 node[elementName][childName].append(value)
164 convert this rspec to an xml string and return it.
166 return self.rootNode.toxml()
169 def toprettyxml(self):
171 print this rspec in xml in a pretty format.
173 return self.rootNode.toprettyxml()
176 def parseFile(self, filename):
178 read a local xml file and store it as a dom object.
180 dom = minidom.parse(filename)
181 self.rootNode = dom.childNodes[0]
184 def parseString(self, xml):
186 read an xml string and store it as a dom object.
188 xml = xml.replace('\n', '').replace('\t', '').strip()
189 dom = minidom.parseString(xml)
190 self.rootNode = dom.childNodes[0]
193 def _httpGetXSD(self, xsdURI):
194 # split the URI into relevant parts
195 host = xsdURI.split("/")[2]
196 if xsdURI.startswith("https"):
197 conn = httplib.HTTPSConnection(host,
198 httplib.HTTPSConnection.default_port)
199 elif xsdURI.startswith("http"):
200 conn = httplib.HTTPConnection(host,
201 httplib.HTTPConnection.default_port)
202 conn.request("GET", xsdURI)
203 # If we can't download the schema, raise an exception
204 r1 = conn.getresponse()
207 return r1.read().replace('\n', '').replace('\t', '').strip()
210 def _parseXSD(self, xsdURI):
212 Download XSD from URL, or if file, read local xsd file and set schemaDict
214 # Since the schema definiton is a global namespace shared by and agreed upon by
215 # others, this should probably be a URL. Check for URL, download xsd, parse, or
216 # if local file, use local file.
218 if xsdURI.startswith("http"):
220 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
222 # logging.debug("%s: web file not found" % xsdURI)
223 # logging.debug("Using local file %s" % self.xsd")
225 print "Can't find %s on the web. Continuing." % xsdURI
227 if os.path.exists(xsdURI):
228 # logging.debug("using local copy.")
229 print "Using local %s" % xsdURI
230 schemaDom = minidom.parse(xsdURI)
232 raise Exception("Can't find xsd locally")
233 self.schemaDict = self.toDict(schemaDom.childNodes[0])
236 def dict2dom(self, rdict, include_doc = False):
238 convert a dict object into a dom object.
241 def elementNode(tagname, rd):
242 element = minidom.Element(tagname)
243 for key in rd.keys():
244 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
245 element.setAttribute(key, str(rd[key]))
246 elif isinstance(rd[key], dict):
247 child = elementNode(key, rd[key])
248 element.appendChild(child)
249 elif isinstance(rd[key], list):
251 if isinstance(item, dict):
252 child = elementNode(key, item)
253 element.appendChild(child)
254 elif isinstance(item, StringTypes) or isinstance(item, int):
255 child = minidom.Element(key)
256 text = minidom.Text()
258 child.appendChild(text)
259 element.appendChild(child)
262 # Minidom does not allow documents to have more then one
263 # child, but elements may have many children. Because of
264 # this, the document's root node will be the first key/value
265 # pair in the dictionary.
266 node = elementNode(rdict.keys()[0], rdict.values()[0])
268 rootNode = minidom.Document()
269 rootNode.appendChild(node)
275 def parseDict(self, rdict, include_doc = True):
277 Convert a dictionary into a dom object and store it.
279 self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
282 def getDictsByTagName(self, tagname, dom = None):
284 Search the dom for all elements with the specified tagname
285 and return them as a list of dicts
290 doms = dom.getElementsByTagName(tagname)
291 dictlist = [self.toDict(d) for d in doms]
292 for item in dictlist:
293 for value in item.values():
297 def getDictByTagNameValue(self, tagname, value, dom = None):
299 Search the dom for the first element with the specified tagname
300 and value and return it as a dict.
305 dicts = self.getDictsByTagName(tagname, dom)
308 if rdict.has_key('name') and rdict['name'] in [value]:
314 def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
316 Removes all elements where:
317 1. tagname matches the element tag
318 2. attribute matches the element attribte
319 3. attribute value is in valuelist
326 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
327 if whitelist and dom.attributes.get(attribute).value not in whitelist:
328 dom.parentNode.removeChild(dom)
329 if blacklist and dom.attributes.get(attribute).value in blacklist:
330 dom.parentNode.removeChild(dom)
332 if dom.hasChildNodes():
333 for child in dom.childNodes:
334 self.filter(tagname, attribute, blacklist, whitelist, child)
337 def merge(self, rspecs, tagname, dom=None):
339 Merge this rspec with the requested rspec based on the specified
340 starting tag name. The start tag (and all of its children) will be merged
349 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
350 if whitelist and dom.attributes.get(attribute).value not in whitelist:
351 dom.parentNode.removeChild(dom)
352 if blacklist and dom.attributes.get(attribute).value in blacklist:
353 dom.parentNode.removeChild(dom)
355 if dom.hasChildNodes():
356 for child in dom.childNodes:
357 self.filter(tagname, attribute, blacklist, whitelist, child)
359 def validateDicts(self):
369 def pprint(self, r = None, depth = 0):
371 Pretty print the dict
374 if r == None: r = self.dict
376 for tab in range(0,depth): line += " "
377 # check if it's nested
380 print line + "%s:" % i
381 self.pprint(r[i], depth + 1)
382 elif type(r) in (tuple, list):
383 for j in r: self.pprint(j, depth + 1)
384 # not nested so just print.
386 print line + "%s" % r
390 class RecordSpec(RSpec):
393 def parseDict(self, rdict, include_doc = False):
395 Convert a dictionary into a dom object and store it.
397 self.rootNode = self.dict2dom(rdict, include_doc)
399 def dict2dom(self, rdict, include_doc = False):
401 if not len(rdict.keys()) == 1:
402 record_dict = {self.root_tag : rdict}
403 return RSpec.dict2dom(self, record_dict, include_doc)