1 ### $Id: rspec.py 18557 2010-08-03 19:18:39Z tmack $
2 ### $URL: http://svn.planet-lab.org/svn/sfa/trunk/sfa/util/rspec.py $
8 from xml.dom import minidom
9 from types import StringTypes, ListType
10 from lxml import etree
11 from StringIO import StringIO
16 def __init__(self, xml = None, xsd = None, NSURL = None):
18 Class to manipulate RSpecs. Reads and parses rspec xml into python dicts
19 and reads python dicts and writes rspec xml
21 self.xsd = # Schema. Can be local or remote file.
22 self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
23 self.rootNode = # root of the DOM
24 self.dict = # dict of the RSpec.
25 self.schemaDict = {} # dict of the Schema
36 if type(xml) in StringTypes:
38 self.dict = self.toDict()
40 self._parseXSD(self.NSURL + self.xsd)
43 def _getText(self, nodelist):
46 if node.nodeType == node.TEXT_NODE:
50 # The rspec is comprised of 2 parts, and 1 reference:
51 # attributes/elements describe individual resources
52 # complexTypes are used to describe a set of attributes/elements
53 # complexTypes can include a reference to other complexTypes.
56 def _getName(self, node):
57 '''Gets name of node. If tag has no name, then return tag's localName'''
59 if not node.nodeName.startswith("#"):
62 elif node.attributes.has_key("name"):
63 name = node.attributes.get("name").value
67 # Attribute. {name : nameofattribute, {items: values})
68 def _attributeDict(self, attributeDom):
69 '''Traverse single attribute node. Create a dict {attributename : {name: value,}]}'''
70 node = {} # parsed dict
71 for attr in attributeDom.attributes.keys():
72 node[attr] = attributeDom.attributes.get(attr).value
76 def appendToDictOrCreate(self, dict, key, value):
77 if (dict.has_key(key)):
78 dict[key].append(value)
83 def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
85 convert an XML to a nested dict:
86 * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
87 * Terminal nodes (the rest) are nested dictionaries
93 curNodeName = nodeDom.localName
95 if (nodeDom.hasChildNodes()):
97 for attribute in nodeDom.attributes.keys():
98 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
99 for child in nodeDom.childNodes[:-1]:
100 if (child.nodeValue):
101 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
103 childdict = self.toGenDict(child, None, childdict, curNodeName)
105 child = nodeDom.childNodes[-1]
106 if (child.nodeValue):
107 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
109 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
111 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
114 for attribute in nodeDom.attributes.keys():
115 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
117 self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
119 if (parentdict is not None):
120 parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
127 def toDict(self, nodeDom = None):
129 convert this rspec to a dict and return it.
133 nodeDom = self.rootNode
135 elementName = nodeDom.nodeName
136 if elementName and not elementName.startswith("#"):
137 # attributes have tags and values. get {tag: value}, else {type: value}
138 node[elementName] = self._attributeDict(nodeDom)
139 # resolve the child nodes.
140 if nodeDom.hasChildNodes():
141 for child in nodeDom.childNodes:
142 childName = self._getName(child)
145 if not childName: continue
147 # initialize the possible array of children
148 if not node[elementName].has_key(childName): node[elementName][childName] = []
150 if isinstance(child, minidom.Text):
151 # add if data is not empty
152 if child.data.strip():
153 node[elementName][childName].append(nextchild.data)
154 elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
155 for nextchild in child.childNodes:
156 node[elementName][childName].append(nextchild.data)
158 childdict = self.toDict(child)
159 for value in childdict.values():
160 node[elementName][childName].append(value)
167 convert this rspec to an xml string and return it.
169 return self.rootNode.toxml()
172 def toprettyxml(self):
174 print this rspec in xml in a pretty format.
176 return self.rootNode.toprettyxml()
179 def __removeWhitespaceNodes(self, parent):
180 for child in list(parent.childNodes):
181 if child.nodeType == minidom.Node.TEXT_NODE and child.data.strip() == '':
182 parent.removeChild(child)
184 self.__removeWhitespaceNodes(child)
186 def parseFile(self, filename):
188 read a local xml file and store it as a dom object.
190 dom = minidom.parse(filename)
191 self.__removeWhitespaceNodes(dom)
192 self.rootNode = dom.childNodes[0]
195 def parseString(self, xml):
197 read an xml string and store it as a dom object.
199 dom = minidom.parseString(xml)
200 self.__removeWhitespaceNodes(dom)
201 self.rootNode = dom.childNodes[0]
204 def _httpGetXSD(self, xsdURI):
205 # split the URI into relevant parts
206 host = xsdURI.split("/")[2]
207 if xsdURI.startswith("https"):
208 conn = httplib.HTTPSConnection(host,
209 httplib.HTTPSConnection.default_port)
210 elif xsdURI.startswith("http"):
211 conn = httplib.HTTPConnection(host,
212 httplib.HTTPConnection.default_port)
213 conn.request("GET", xsdURI)
214 # If we can't download the schema, raise an exception
215 r1 = conn.getresponse()
218 return r1.read().replace('\n', '').replace('\t', '').strip()
221 def _parseXSD(self, xsdURI):
223 Download XSD from URL, or if file, read local xsd file and set
226 Since the schema definiton is a global namespace shared by and
227 agreed upon by others, this should probably be a URL. Check
228 for URL, download xsd, parse, or if local file, use that.
231 if xsdURI.startswith("http"):
233 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
235 # logging.debug("%s: web file not found" % xsdURI)
236 # logging.debug("Using local file %s" % self.xsd")
238 print "Can't find %s on the web. Continuing." % xsdURI
240 if os.path.exists(xsdURI):
241 # logging.debug("using local copy.")
242 print "Using local %s" % xsdURI
243 schemaDom = minidom.parse(xsdURI)
245 raise Exception("Can't find xsd locally")
246 self.schemaDict = self.toDict(schemaDom.childNodes[0])
249 def dict2dom(self, rdict, include_doc = False):
251 convert a dict object into a dom object.
254 def elementNode(tagname, rd):
255 element = minidom.Element(tagname)
256 for key in rd.keys():
257 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
258 element.setAttribute(key, str(rd[key]))
259 elif isinstance(rd[key], dict):
260 child = elementNode(key, rd[key])
261 element.appendChild(child)
262 elif isinstance(rd[key], list):
264 if isinstance(item, dict):
265 child = elementNode(key, item)
266 element.appendChild(child)
267 elif isinstance(item, StringTypes) or isinstance(item, int):
268 child = minidom.Element(key)
269 text = minidom.Text()
271 child.appendChild(text)
272 element.appendChild(child)
275 # Minidom does not allow documents to have more then one
276 # child, but elements may have many children. Because of
277 # this, the document's root node will be the first key/value
278 # pair in the dictionary.
279 node = elementNode(rdict.keys()[0], rdict.values()[0])
281 rootNode = minidom.Document()
282 rootNode.appendChild(node)
288 def parseDict(self, rdict, include_doc = True):
290 Convert a dictionary into a dom object and store it.
292 self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
295 def getDictsByTagName(self, tagname, dom = None):
297 Search the dom for all elements with the specified tagname
298 and return them as a list of dicts
303 doms = dom.getElementsByTagName(tagname)
304 dictlist = [self.toDict(d) for d in doms]
305 for item in dictlist:
306 for value in item.values():
310 def getDictByTagNameValue(self, tagname, value, dom = None):
312 Search the dom for the first element with the specified tagname
313 and value and return it as a dict.
318 dicts = self.getDictsByTagName(tagname, dom)
321 if rdict.has_key('name') and rdict['name'] in [value]:
327 def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
329 Removes all elements where:
330 1. tagname matches the element tag
331 2. attribute matches the element attribte
332 3. attribute value is in valuelist
339 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
340 if whitelist and dom.attributes.get(attribute).value not in whitelist:
341 dom.parentNode.removeChild(dom)
342 if blacklist and dom.attributes.get(attribute).value in blacklist:
343 dom.parentNode.removeChild(dom)
345 if dom.hasChildNodes():
346 for child in dom.childNodes:
347 self.filter(tagname, attribute, blacklist, whitelist, child)
350 def merge(self, rspecs, tagname, dom=None):
352 Merge this rspec with the requested rspec based on the specified
353 starting tag name. The start tag (and all of its children) will be merged
362 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
363 if whitelist and dom.attributes.get(attribute).value not in whitelist:
364 dom.parentNode.removeChild(dom)
365 if blacklist and dom.attributes.get(attribute).value in blacklist:
366 dom.parentNode.removeChild(dom)
368 if dom.hasChildNodes():
369 for child in dom.childNodes:
370 self.filter(tagname, attribute, blacklist, whitelist, child)
372 def validateDicts(self):
382 def pprint(self, r = None, depth = 0):
384 Pretty print the dict
387 if r == None: r = self.dict
389 for tab in range(0,depth): line += " "
390 # check if it's nested
393 print line + "%s:" % i
394 self.pprint(r[i], depth + 1)
395 elif type(r) in (tuple, list):
396 for j in r: self.pprint(j, depth + 1)
397 # not nested so just print.
399 print line + "%s" % r
403 class RecordSpec(RSpec):
406 def parseDict(self, rdict, include_doc = False):
408 Convert a dictionary into a dom object and store it.
410 self.rootNode = self.dict2dom(rdict, include_doc)
412 def dict2dom(self, rdict, include_doc = False):
414 if not len(rdict.keys()) == 1:
415 record_dict = {self.root_tag : rdict}
416 return RSpec.dict2dom(self, record_dict, include_doc)