8 from xml.dom import minidom
9 from types import StringTypes, ListType
10 from lxml import etree
11 from StringIO import StringIO
13 from sfa.util.sfalogging import sfa_logger
17 def __init__(self, xml = None, xsd = None, NSURL = None):
19 Class to manipulate RSpecs. Reads and parses rspec xml into python dicts
20 and reads python dicts and writes rspec xml
22 self.xsd = # Schema. Can be local or remote file.
23 self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
24 self.rootNode = # root of the DOM
25 self.dict = # dict of the RSpec.
26 self.schemaDict = {} # dict of the Schema
37 if type(xml) in StringTypes:
39 self.dict = self.toDict()
41 self._parseXSD(self.NSURL + self.xsd)
44 def _getText(self, nodelist):
47 if node.nodeType == node.TEXT_NODE:
51 # The rspec is comprised of 2 parts, and 1 reference:
52 # attributes/elements describe individual resources
53 # complexTypes are used to describe a set of attributes/elements
54 # complexTypes can include a reference to other complexTypes.
57 def _getName(self, node):
58 '''Gets name of node. If tag has no name, then return tag's localName'''
60 if not node.nodeName.startswith("#"):
63 elif node.attributes.has_key("name"):
64 name = node.attributes.get("name").value
68 # Attribute. {name : nameofattribute, {items: values})
69 def _attributeDict(self, attributeDom):
70 '''Traverse single attribute node. Create a dict {attributename : {name: value,}]}'''
71 node = {} # parsed dict
72 for attr in attributeDom.attributes.keys():
73 node[attr] = attributeDom.attributes.get(attr).value
77 def appendToDictOrCreate(self, dict, key, value):
78 if (dict.has_key(key)):
79 dict[key].append(value)
84 def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
86 convert an XML to a nested dict:
87 * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
88 * Terminal nodes (the rest) are nested dictionaries
94 curNodeName = nodeDom.localName
96 if (nodeDom.hasChildNodes()):
98 for attribute in nodeDom.attributes.keys():
99 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
100 for child in nodeDom.childNodes[:-1]:
101 if (child.nodeValue):
102 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
104 childdict = self.toGenDict(child, None, childdict, curNodeName)
106 child = nodeDom.childNodes[-1]
107 if (child.nodeValue):
108 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
110 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
112 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
115 for attribute in nodeDom.attributes.keys():
116 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
118 self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
120 if (parentdict is not None):
121 parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
128 def toDict(self, nodeDom = None):
130 convert this rspec to a dict and return it.
134 nodeDom = self.rootNode
136 elementName = nodeDom.nodeName
137 if elementName and not elementName.startswith("#"):
138 # attributes have tags and values. get {tag: value}, else {type: value}
139 node[elementName] = self._attributeDict(nodeDom)
140 # resolve the child nodes.
141 if nodeDom.hasChildNodes():
142 for child in nodeDom.childNodes:
143 childName = self._getName(child)
146 if not childName: continue
148 # initialize the possible array of children
149 if not node[elementName].has_key(childName): node[elementName][childName] = []
151 if isinstance(child, minidom.Text):
152 # add if data is not empty
153 if child.data.strip():
154 node[elementName][childName].append(nextchild.data)
155 elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
156 for nextchild in child.childNodes:
157 node[elementName][childName].append(nextchild.data)
159 childdict = self.toDict(child)
160 for value in childdict.values():
161 node[elementName][childName].append(value)
168 convert this rspec to an xml string and return it.
170 return self.rootNode.toxml()
173 def toprettyxml(self):
175 print this rspec in xml in a pretty format.
177 return self.rootNode.toprettyxml()
180 def __removeWhitespaceNodes(self, parent):
181 for child in list(parent.childNodes):
182 if child.nodeType == minidom.Node.TEXT_NODE and child.data.strip() == '':
183 parent.removeChild(child)
185 self.__removeWhitespaceNodes(child)
187 def parseFile(self, filename):
189 read a local xml file and store it as a dom object.
191 dom = minidom.parse(filename)
192 self.__removeWhitespaceNodes(dom)
193 self.rootNode = dom.childNodes[0]
196 def parseString(self, xml):
198 read an xml string and store it as a dom object.
200 dom = minidom.parseString(xml)
201 self.__removeWhitespaceNodes(dom)
202 self.rootNode = dom.childNodes[0]
205 def _httpGetXSD(self, xsdURI):
206 # split the URI into relevant parts
207 host = xsdURI.split("/")[2]
208 if xsdURI.startswith("https"):
209 conn = httplib.HTTPSConnection(host,
210 httplib.HTTPSConnection.default_port)
211 elif xsdURI.startswith("http"):
212 conn = httplib.HTTPConnection(host,
213 httplib.HTTPConnection.default_port)
214 conn.request("GET", xsdURI)
215 # If we can't download the schema, raise an exception
216 r1 = conn.getresponse()
219 return r1.read().replace('\n', '').replace('\t', '').strip()
222 def _parseXSD(self, xsdURI):
224 Download XSD from URL, or if file, read local xsd file and set
227 Since the schema definiton is a global namespace shared by and
228 agreed upon by others, this should probably be a URL. Check
229 for URL, download xsd, parse, or if local file, use that.
232 if xsdURI.startswith("http"):
234 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
236 # logging.debug("%s: web file not found" % xsdURI)
237 # logging.debug("Using local file %s" % self.xsd")
238 sfa_logger().log_exc("rspec.parseXSD: can't find %s on the web. Continuing." % xsdURI)
240 if os.path.exists(xsdURI):
241 # logging.debug("using local copy.")
242 sfa_logger().debug("rspec.parseXSD: Using local %s" % xsdURI)
243 schemaDom = minidom.parse(xsdURI)
245 raise Exception("rspec.parseXSD: can't find xsd locally")
246 self.schemaDict = self.toDict(schemaDom.childNodes[0])
249 def dict2dom(self, rdict, include_doc = False):
251 convert a dict object into a dom object.
254 def elementNode(tagname, rd):
255 element = minidom.Element(tagname)
256 for key in rd.keys():
257 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
258 element.setAttribute(key, str(rd[key]))
259 elif isinstance(rd[key], dict):
260 child = elementNode(key, rd[key])
261 element.appendChild(child)
262 elif isinstance(rd[key], list):
264 if isinstance(item, dict):
265 child = elementNode(key, item)
266 element.appendChild(child)
267 elif isinstance(item, StringTypes) or isinstance(item, int):
268 child = minidom.Element(key)
269 text = minidom.Text()
271 child.appendChild(text)
272 element.appendChild(child)
275 # Minidom does not allow documents to have more then one
276 # child, but elements may have many children. Because of
277 # this, the document's root node will be the first key/value
278 # pair in the dictionary.
279 node = elementNode(rdict.keys()[0], rdict.values()[0])
281 rootNode = minidom.Document()
282 rootNode.appendChild(node)
288 def parseDict(self, rdict, include_doc = True):
290 Convert a dictionary into a dom object and store it.
292 self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
295 def getDictsByTagName(self, tagname, dom = None):
297 Search the dom for all elements with the specified tagname
298 and return them as a list of dicts
303 doms = dom.getElementsByTagName(tagname)
304 dictlist = [self.toDict(d) for d in doms]
305 for item in dictlist:
306 for value in item.values():
310 def getDictByTagNameValue(self, tagname, value, dom = None):
312 Search the dom for the first element with the specified tagname
313 and value and return it as a dict.
318 dicts = self.getDictsByTagName(tagname, dom)
321 if rdict.has_key('name') and rdict['name'] in [value]:
327 def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
329 Removes all elements where:
330 1. tagname matches the element tag
331 2. attribute matches the element attribte
332 3. attribute value is in valuelist
339 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
340 if whitelist and dom.attributes.get(attribute).value not in whitelist:
341 dom.parentNode.removeChild(dom)
342 if blacklist and dom.attributes.get(attribute).value in blacklist:
343 dom.parentNode.removeChild(dom)
345 if dom.hasChildNodes():
346 for child in dom.childNodes:
347 self.filter(tagname, attribute, blacklist, whitelist, child)
350 def merge(self, rspecs, tagname, dom=None):
352 Merge this rspec with the requested rspec based on the specified
353 starting tag name. The start tag (and all of its children) will be merged
362 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
363 if whitelist and dom.attributes.get(attribute).value not in whitelist:
364 dom.parentNode.removeChild(dom)
365 if blacklist and dom.attributes.get(attribute).value in blacklist:
366 dom.parentNode.removeChild(dom)
368 if dom.hasChildNodes():
369 for child in dom.childNodes:
370 self.filter(tagname, attribute, blacklist, whitelist, child)
372 def validateDicts(self):
382 def pprint(self, r = None, depth = 0):
384 Pretty print the dict
387 if r == None: r = self.dict
389 for tab in range(0,depth): line += " "
390 # check if it's nested
393 print line + "%s:" % i
394 self.pprint(r[i], depth + 1)
395 elif type(r) in (tuple, list):
396 for j in r: self.pprint(j, depth + 1)
397 # not nested so just print.
399 print line + "%s" % r
403 class RecordSpec(RSpec):
406 def parseDict(self, rdict, include_doc = False):
408 Convert a dictionary into a dom object and store it.
410 self.rootNode = self.dict2dom(rdict, include_doc)
412 def dict2dom(self, rdict, include_doc = False):
414 if not len(rdict.keys()) == 1:
415 record_dict = {self.root_tag : rdict}
416 return RSpec.dict2dom(self, record_dict, include_doc)