8 from xml.dom import minidom
9 from types import StringTypes, ListType
10 from lxml import etree
11 from StringIO import StringIO
13 def merge_rspecs(rspecs):
15 Merge merge a set of RSpecs into 1 RSpec, and return the result.
16 rspecs must be a valid RSpec string or list of rspec strings.
18 if not rspecs or not isinstance(rspecs, list):
22 for tmp_rspec in rspecs:
24 tree = etree.parse(StringIO(tmp_rspec))
25 except etree.XMLSyntaxError:
26 # consider failing silently here
27 message = str(agg_rspec) + ": " + str(sys.exc_info()[1])
28 raise InvalidRSpec(message)
31 if root.get("type") in ["SFA"]:
35 for network in root.iterfind("./network"):
36 rspec.append(deepcopy(network))
37 for request in root.iterfind("./request"):
38 rspec.append(deepcopy(request))
39 return etree.tostring(rspec, xml_declaration=True, pretty_print=True)
45 def __init__(self, xml = None, xsd = None, NSURL = None):
47 Class to manipulate RSpecs. Reads and parses rspec xml into python dicts
48 and reads python dicts and writes rspec xml
50 self.xsd = # Schema. Can be local or remote file.
51 self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
52 self.rootNode = # root of the DOM
53 self.dict = # dict of the RSpec.
54 self.schemaDict = {} # dict of the Schema
65 if type(xml) in StringTypes:
67 self.dict = self.toDict()
69 self._parseXSD(self.NSURL + self.xsd)
72 def _getText(self, nodelist):
75 if node.nodeType == node.TEXT_NODE:
79 # The rspec is comprised of 2 parts, and 1 reference:
80 # attributes/elements describe individual resources
81 # complexTypes are used to describe a set of attributes/elements
82 # complexTypes can include a reference to other complexTypes.
85 def _getName(self, node):
86 '''Gets name of node. If tag has no name, then return tag's localName'''
88 if not node.nodeName.startswith("#"):
91 elif node.attributes.has_key("name"):
92 name = node.attributes.get("name").value
96 # Attribute. {name : nameofattribute, {items: values})
97 def _attributeDict(self, attributeDom):
98 '''Traverse single attribute node. Create a dict {attributename : {name: value,}]}'''
99 node = {} # parsed dict
100 for attr in attributeDom.attributes.keys():
101 node[attr] = attributeDom.attributes.get(attr).value
105 def appendToDictOrCreate(self, dict, key, value):
106 if (dict.has_key(key)):
107 dict[key].append(value)
112 def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
114 convert an XML to a nested dict:
115 * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
116 * Terminal nodes (the rest) are nested dictionaries
120 nodeDom=self.rootNode
122 curNodeName = nodeDom.localName
124 if (nodeDom.hasChildNodes()):
126 for attribute in nodeDom.attributes.keys():
127 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
128 for child in nodeDom.childNodes[:-1]:
129 if (child.nodeValue):
130 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
132 childdict = self.toGenDict(child, None, childdict, curNodeName)
134 child = nodeDom.childNodes[-1]
135 if (child.nodeValue):
136 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
138 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
140 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
143 for attribute in nodeDom.attributes.keys():
144 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
146 self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
148 if (parentdict is not None):
149 parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
156 def toDict(self, nodeDom = None):
158 convert this rspec to a dict and return it.
162 nodeDom = self.rootNode
164 elementName = nodeDom.nodeName
165 if elementName and not elementName.startswith("#"):
166 # attributes have tags and values. get {tag: value}, else {type: value}
167 node[elementName] = self._attributeDict(nodeDom)
168 # resolve the child nodes.
169 if nodeDom.hasChildNodes():
170 for child in nodeDom.childNodes:
171 childName = self._getName(child)
174 if not childName: continue
176 # initialize the possible array of children
177 if not node[elementName].has_key(childName): node[elementName][childName] = []
179 if isinstance(child, minidom.Text):
180 # add if data is not empty
181 if child.data.strip():
182 node[elementName][childName].append(nextchild.data)
183 elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
184 for nextchild in child.childNodes:
185 node[elementName][childName].append(nextchild.data)
187 childdict = self.toDict(child)
188 for value in childdict.values():
189 node[elementName][childName].append(value)
196 convert this rspec to an xml string and return it.
198 return self.rootNode.toxml()
201 def toprettyxml(self):
203 print this rspec in xml in a pretty format.
205 return self.rootNode.toprettyxml()
208 def __removeWhitespaceNodes(self, parent):
209 for child in list(parent.childNodes):
210 if child.nodeType == minidom.Node.TEXT_NODE and child.data.strip() == '':
211 parent.removeChild(child)
213 self.__removeWhitespaceNodes(child)
215 def parseFile(self, filename):
217 read a local xml file and store it as a dom object.
219 dom = minidom.parse(filename)
220 self.__removeWhitespaceNodes(dom)
221 self.rootNode = dom.childNodes[0]
224 def parseString(self, xml):
226 read an xml string and store it as a dom object.
228 dom = minidom.parseString(xml)
229 self.__removeWhitespaceNodes(dom)
230 self.rootNode = dom.childNodes[0]
233 def _httpGetXSD(self, xsdURI):
234 # split the URI into relevant parts
235 host = xsdURI.split("/")[2]
236 if xsdURI.startswith("https"):
237 conn = httplib.HTTPSConnection(host,
238 httplib.HTTPSConnection.default_port)
239 elif xsdURI.startswith("http"):
240 conn = httplib.HTTPConnection(host,
241 httplib.HTTPConnection.default_port)
242 conn.request("GET", xsdURI)
243 # If we can't download the schema, raise an exception
244 r1 = conn.getresponse()
247 return r1.read().replace('\n', '').replace('\t', '').strip()
250 def _parseXSD(self, xsdURI):
252 Download XSD from URL, or if file, read local xsd file and set
255 Since the schema definiton is a global namespace shared by and
256 agreed upon by others, this should probably be a URL. Check
257 for URL, download xsd, parse, or if local file, use that.
260 if xsdURI.startswith("http"):
262 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
264 # logging.debug("%s: web file not found" % xsdURI)
265 # logging.debug("Using local file %s" % self.xsd")
267 print "Can't find %s on the web. Continuing." % xsdURI
269 if os.path.exists(xsdURI):
270 # logging.debug("using local copy.")
271 print "Using local %s" % xsdURI
272 schemaDom = minidom.parse(xsdURI)
274 raise Exception("Can't find xsd locally")
275 self.schemaDict = self.toDict(schemaDom.childNodes[0])
278 def dict2dom(self, rdict, include_doc = False):
280 convert a dict object into a dom object.
283 def elementNode(tagname, rd):
284 element = minidom.Element(tagname)
285 for key in rd.keys():
286 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
287 element.setAttribute(key, str(rd[key]))
288 elif isinstance(rd[key], dict):
289 child = elementNode(key, rd[key])
290 element.appendChild(child)
291 elif isinstance(rd[key], list):
293 if isinstance(item, dict):
294 child = elementNode(key, item)
295 element.appendChild(child)
296 elif isinstance(item, StringTypes) or isinstance(item, int):
297 child = minidom.Element(key)
298 text = minidom.Text()
300 child.appendChild(text)
301 element.appendChild(child)
304 # Minidom does not allow documents to have more then one
305 # child, but elements may have many children. Because of
306 # this, the document's root node will be the first key/value
307 # pair in the dictionary.
308 node = elementNode(rdict.keys()[0], rdict.values()[0])
310 rootNode = minidom.Document()
311 rootNode.appendChild(node)
317 def parseDict(self, rdict, include_doc = True):
319 Convert a dictionary into a dom object and store it.
321 self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
324 def getDictsByTagName(self, tagname, dom = None):
326 Search the dom for all elements with the specified tagname
327 and return them as a list of dicts
332 doms = dom.getElementsByTagName(tagname)
333 dictlist = [self.toDict(d) for d in doms]
334 for item in dictlist:
335 for value in item.values():
339 def getDictByTagNameValue(self, tagname, value, dom = None):
341 Search the dom for the first element with the specified tagname
342 and value and return it as a dict.
347 dicts = self.getDictsByTagName(tagname, dom)
350 if rdict.has_key('name') and rdict['name'] in [value]:
356 def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
358 Removes all elements where:
359 1. tagname matches the element tag
360 2. attribute matches the element attribte
361 3. attribute value is in valuelist
368 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
369 if whitelist and dom.attributes.get(attribute).value not in whitelist:
370 dom.parentNode.removeChild(dom)
371 if blacklist and dom.attributes.get(attribute).value in blacklist:
372 dom.parentNode.removeChild(dom)
374 if dom.hasChildNodes():
375 for child in dom.childNodes:
376 self.filter(tagname, attribute, blacklist, whitelist, child)
379 def merge(self, rspecs, tagname, dom=None):
381 Merge this rspec with the requested rspec based on the specified
382 starting tag name. The start tag (and all of its children) will be merged
391 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
392 if whitelist and dom.attributes.get(attribute).value not in whitelist:
393 dom.parentNode.removeChild(dom)
394 if blacklist and dom.attributes.get(attribute).value in blacklist:
395 dom.parentNode.removeChild(dom)
397 if dom.hasChildNodes():
398 for child in dom.childNodes:
399 self.filter(tagname, attribute, blacklist, whitelist, child)
401 def validateDicts(self):
411 def pprint(self, r = None, depth = 0):
413 Pretty print the dict
416 if r == None: r = self.dict
418 for tab in range(0,depth): line += " "
419 # check if it's nested
422 print line + "%s:" % i
423 self.pprint(r[i], depth + 1)
424 elif type(r) in (tuple, list):
425 for j in r: self.pprint(j, depth + 1)
426 # not nested so just print.
428 print line + "%s" % r
432 class RecordSpec(RSpec):
435 def parseDict(self, rdict, include_doc = False):
437 Convert a dictionary into a dom object and store it.
439 self.rootNode = self.dict2dom(rdict, include_doc)
441 def dict2dom(self, rdict, include_doc = False):
443 if not len(rdict.keys()) == 1:
444 record_dict = {self.root_tag : rdict}
445 return RSpec.dict2dom(self, record_dict, include_doc)