8 from xml.dom import minidom
9 from types import StringTypes, ListType
13 def __init__(self, xml = None, xsd = None, NSURL = None):
15 Class to manipulate RSpecs. Reads and parses rspec xml into python dicts
16 and reads python dicts and writes rspec xml
18 self.xsd = # Schema. Can be local or remote file.
19 self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
20 self.rootNode = # root of the DOM
21 self.dict = # dict of the RSpec.
22 self.schemaDict = {} # dict of the Schema
35 self.dict = self.toDict()
37 self._parseXSD(self.NSURL + self.xsd)
40 def _getText(self, nodelist):
43 if node.nodeType == node.TEXT_NODE:
47 # The rspec is comprised of 2 parts, and 1 reference:
48 # attributes/elements describe individual resources
49 # complexTypes are used to describe a set of attributes/elements
50 # complexTypes can include a reference to other complexTypes.
53 def _getName(self, node):
54 '''Gets name of node. If tag has no name, then return tag's localName'''
56 if not node.nodeName.startswith("#"):
59 elif node.attributes.has_key("name"):
60 name = node.attributes.get("name").value
64 # Attribute. {name : nameofattribute, {items: values})
65 def _attributeDict(self, attributeDom):
66 '''Traverse single attribute node. Create a dict {attributename : {name: value,}]}'''
67 node = {} # parsed dict
68 for attr in attributeDom.attributes.keys():
69 node[attr] = attributeDom.attributes.get(attr).value
73 def appendToDictOrCreate(self, dict, key, value):
74 if (dict.has_key(key)):
75 dict[key].append(value)
80 def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
82 convert an XML to a nested dict:
83 * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
84 * Terminal nodes (the rest) are nested dictionaries
90 curNodeName = nodeDom.localName
92 if (nodeDom.hasChildNodes()):
94 for attribute in nodeDom.attributes.keys():
95 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
96 for child in nodeDom.childNodes[:-1]:
98 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
100 childdict = self.toGenDict(child, None, childdict, curNodeName)
102 child = nodeDom.childNodes[-1]
103 if (child.nodeValue):
104 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
106 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
108 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
111 for attribute in nodeDom.attributes.keys():
112 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
114 self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
116 if (parentdict is not None):
117 parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
124 def toDict(self, nodeDom = None):
126 convert this rspec to a dict and return it.
130 nodeDom = self.rootNode
132 elementName = nodeDom.nodeName
133 if elementName and not elementName.startswith("#"):
134 # attributes have tags and values. get {tag: value}, else {type: value}
135 node[elementName] = self._attributeDict(nodeDom)
136 # resolve the child nodes.
137 if nodeDom.hasChildNodes():
138 for child in nodeDom.childNodes:
139 childName = self._getName(child)
143 # initialize the possible array of children
144 if not node[elementName].has_key(childName):
145 node[elementName][childName] = []
146 # if child node has text child nodes
147 # append the children to the array as strings
148 if child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
149 for nextchild in child.childNodes:
150 node[elementName][childName].append(nextchild.data)
151 # convert element child node to dict
153 childdict = self.toDict(child)
154 for value in childdict.values():
155 node[elementName][childName].append(value)
156 #node[childName].append(self.toDict(child))
162 convert this rspec to an xml string and return it.
164 return self.rootNode.toxml()
167 def toprettyxml(self):
169 print this rspec in xml in a pretty format.
171 return self.rootNode.toprettyxml()
174 def parseFile(self, filename):
176 read a local xml file and store it as a dom object.
178 dom = minidom.parse(filename)
179 self.rootNode = dom.childNodes[0]
182 def parseString(self, xml):
184 read an xml string and store it as a dom object.
186 xml = xml.replace('\n', '').replace('\t', '').strip()
187 dom = minidom.parseString(xml)
188 self.rootNode = dom.childNodes[0]
191 def _httpGetXSD(self, xsdURI):
192 # split the URI into relevant parts
193 host = xsdURI.split("/")[2]
194 if xsdURI.startswith("https"):
195 conn = httplib.HTTPSConnection(host,
196 httplib.HTTPSConnection.default_port)
197 elif xsdURI.startswith("http"):
198 conn = httplib.HTTPConnection(host,
199 httplib.HTTPConnection.default_port)
200 conn.request("GET", xsdURI)
201 # If we can't download the schema, raise an exception
202 r1 = conn.getresponse()
205 return r1.read().replace('\n', '').replace('\t', '').strip()
208 def _parseXSD(self, xsdURI):
210 Download XSD from URL, or if file, read local xsd file and set schemaDict
212 # Since the schema definiton is a global namespace shared by and agreed upon by
213 # others, this should probably be a URL. Check for URL, download xsd, parse, or
214 # if local file, use local file.
216 if xsdURI.startswith("http"):
218 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
220 # logging.debug("%s: web file not found" % xsdURI)
221 # logging.debug("Using local file %s" % self.xsd")
223 print "Can't find %s on the web. Continuing." % xsdURI
225 if os.path.exists(xsdURI):
226 # logging.debug("using local copy.")
227 print "Using local %s" % xsdURI
228 schemaDom = minidom.parse(xsdURI)
230 raise Exception("Can't find xsd locally")
231 self.schemaDict = self.toDict(schemaDom.childNodes[0])
234 def dict2dom(self, rdict, include_doc = False):
236 convert a dict object into a dom object.
239 def elementNode(tagname, rd):
240 element = minidom.Element(tagname)
241 for key in rd.keys():
242 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
243 element.setAttribute(key, str(rd[key]))
244 elif isinstance(rd[key], dict):
245 child = elementNode(key, rd[key])
246 element.appendChild(child)
247 elif isinstance(rd[key], list):
249 if isinstance(item, dict):
250 child = elementNode(key, item)
251 element.appendChild(child)
252 elif isinstance(item, StringTypes) or isinstance(item, int):
253 child = minidom.Element(key)
254 text = minidom.Text()
256 child.appendChild(text)
257 element.appendChild(child)
260 # Minidom does not allow documents to have more then one
261 # child, but elements may have many children. Because of
262 # this, the document's root node will be the first key/value
263 # pair in the dictionary.
264 node = elementNode(rdict.keys()[0], rdict.values()[0])
266 rootNode = minidom.Document()
267 rootNode.appendChild(node)
273 def parseDict(self, rdict, include_doc = True):
275 Convert a dictionary into a dom object and store it.
277 self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
280 def getDictsByTagName(self, tagname, dom = None):
282 Search the dom for all elements with the specified tagname
283 and return them as a list of dicts
288 doms = dom.getElementsByTagName(tagname)
289 dictlist = [self.toDict(d) for d in doms]
290 for item in dictlist:
291 for value in item.values():
295 def getDictByTagNameValue(self, tagname, value, dom = None):
297 Search the dom for the first element with the specified tagname
298 and value and return it as a dict.
303 dicts = self.getDictsByTagName(tagname, dom)
306 if rdict.has_key('name') and rdict['name'] in [value]:
312 def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
314 Removes all elements where:
315 1. tagname matches the element tag
316 2. attribute matches the element attribte
317 3. attribute value is in valuelist
324 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
325 if whitelist and dom.attributes.get(attribute).value not in whitelist:
326 dom.parentNode.removeChild(dom)
327 if blacklist and dom.attributes.get(attribute).value in blacklist:
328 dom.parentNode.removeChild(dom)
330 if dom.hasChildNodes():
331 for child in dom.childNodes:
332 self.filter(tagname, attribute, blacklist, whitelist, child)
335 def merge(self, rspecs, tagname, dom=None):
337 Merge this rspec with the requested rspec based on the specified
338 starting tag name. The start tag (and all of its children) will be merged
347 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
348 if whitelist and dom.attributes.get(attribute).value not in whitelist:
349 dom.parentNode.removeChild(dom)
350 if blacklist and dom.attributes.get(attribute).value in blacklist:
351 dom.parentNode.removeChild(dom)
353 if dom.hasChildNodes():
354 for child in dom.childNodes:
355 self.filter(tagname, attribute, blacklist, whitelist, child)
357 def validateDicts(self):
367 def pprint(self, r = None, depth = 0):
369 Pretty print the dict
372 if r == None: r = self.dict
374 for tab in range(0,depth): line += " "
375 # check if it's nested
378 print line + "%s:" % i
379 self.pprint(r[i], depth + 1)
380 elif type(r) in (tuple, list):
381 for j in r: self.pprint(j, depth + 1)
382 # not nested so just print.
384 print line + "%s" % r
388 class RecordSpec(RSpec):
391 def parseDict(self, rdict, include_doc = False):
393 Convert a dictionary into a dom object and store it.
395 self.rootNode = self.dict2dom(rdict, include_doc)
397 def dict2dom(self, rdict, include_doc = False):
399 if not len(rdict.keys()) == 1:
400 record_dict = {self.root_tag : rdict}
401 return RSpec.dict2dom(self, record_dict, include_doc)