4 from StringIO import StringIO
5 from types import StringTypes, ListType
7 from xml.dom import minidom
10 from sfa.util.sfalogging import info_logger
14 def __init__(self, xml = None, xsd = None, NSURL = None):
16 Class to manipulate RSpecs. Reads and parses rspec xml into python dicts
17 and reads python dicts and writes rspec xml
19 self.xsd = # Schema. Can be local or remote file.
20 self.NSURL = # If schema is remote, Name Space URL to query (full path minus filename)
21 self.rootNode = # root of the DOM
22 self.dict = # dict of the RSpec.
23 self.schemaDict = {} # dict of the Schema
34 if type(xml) in StringTypes:
36 self.dict = self.toDict()
38 self._parseXSD(self.NSURL + self.xsd)
41 def _getText(self, nodelist):
44 if node.nodeType == node.TEXT_NODE:
48 # The rspec is comprised of 2 parts, and 1 reference:
49 # attributes/elements describe individual resources
50 # complexTypes are used to describe a set of attributes/elements
51 # complexTypes can include a reference to other complexTypes.
54 def _getName(self, node):
55 '''Gets name of node. If tag has no name, then return tag's localName'''
57 if not node.nodeName.startswith("#"):
60 elif node.attributes.has_key("name"):
61 name = node.attributes.get("name").value
65 # Attribute. {name : nameofattribute, {items: values})
66 def _attributeDict(self, attributeDom):
67 '''Traverse single attribute node. Create a dict {attributename : {name: value,}]}'''
68 node = {} # parsed dict
69 for attr in attributeDom.attributes.keys():
70 node[attr] = attributeDom.attributes.get(attr).value
74 def appendToDictOrCreate(self, dict, key, value):
75 if (dict.has_key(key)):
76 dict[key].append(value)
81 def toGenDict(self, nodeDom=None, parentdict=None, siblingdict={}, parent=None):
83 convert an XML to a nested dict:
84 * Non-terminal nodes (elements with string children and attributes) are simple dictionaries
85 * Terminal nodes (the rest) are nested dictionaries
91 curNodeName = nodeDom.localName
93 if (nodeDom.hasChildNodes()):
95 for attribute in nodeDom.attributes.keys():
96 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
97 for child in nodeDom.childNodes[:-1]:
99 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
101 childdict = self.toGenDict(child, None, childdict, curNodeName)
103 child = nodeDom.childNodes[-1]
104 if (child.nodeValue):
105 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, child.nodeValue)
107 siblingdict = self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
109 siblingdict = self.toGenDict(child, siblingdict, childdict, curNodeName)
112 for attribute in nodeDom.attributes.keys():
113 childdict = self.appendToDictOrCreate(childdict, attribute, nodeDom.getAttribute(attribute))
115 self.appendToDictOrCreate(siblingdict, curNodeName, childdict)
117 if (parentdict is not None):
118 parentdict = self.appendToDictOrCreate(parentdict, parent, siblingdict)
125 def toDict(self, nodeDom = None):
127 convert this rspec to a dict and return it.
131 nodeDom = self.rootNode
133 elementName = nodeDom.nodeName
134 if elementName and not elementName.startswith("#"):
135 # attributes have tags and values. get {tag: value}, else {type: value}
136 node[elementName] = self._attributeDict(nodeDom)
137 # resolve the child nodes.
138 if nodeDom.hasChildNodes():
139 for child in nodeDom.childNodes:
140 childName = self._getName(child)
143 if not childName: continue
145 # initialize the possible array of children
146 if not node[elementName].has_key(childName): node[elementName][childName] = []
148 if isinstance(child, minidom.Text):
149 # add if data is not empty
150 if child.data.strip():
151 node[elementName][childName].append(nextchild.data)
152 elif child.hasChildNodes() and isinstance(child.childNodes[0], minidom.Text):
153 for nextchild in child.childNodes:
154 node[elementName][childName].append(nextchild.data)
156 childdict = self.toDict(child)
157 for value in childdict.values():
158 node[elementName][childName].append(value)
165 convert this rspec to an xml string and return it.
167 return self.rootNode.toxml()
170 def toprettyxml(self):
172 print this rspec in xml in a pretty format.
174 return self.rootNode.toprettyxml()
177 def __removeWhitespaceNodes(self, parent):
178 for child in list(parent.childNodes):
179 if child.nodeType == minidom.Node.TEXT_NODE and child.data.strip() == '':
180 parent.removeChild(child)
182 self.__removeWhitespaceNodes(child)
184 def parseFile(self, filename):
186 read a local xml file and store it as a dom object.
188 dom = minidom.parse(filename)
189 self.__removeWhitespaceNodes(dom)
190 self.rootNode = dom.childNodes[0]
193 def parseString(self, xml):
195 read an xml string and store it as a dom object.
197 print>>sys.stderr, "\r\n \t RSPEC.PY parseString xml \r\n %s " %(xml)
198 #xmlUnicode = unicode( xml, 'utf-8' )
199 xml = (xml.encode("utf-8"))
200 dom = minidom.parseString(xml)
201 print>>sys.stderr, "\r\n \t RSPEC.PY OKKK parseString dom \r\n %s " %(dom)
202 self.__removeWhitespaceNodes(dom)
203 self.rootNode = dom.childNodes[0]
206 def _httpGetXSD(self, xsdURI):
207 # split the URI into relevant parts
208 host = xsdURI.split("/")[2]
209 if xsdURI.startswith("https"):
210 conn = httplib.HTTPSConnection(host,
211 httplib.HTTPSConnection.default_port)
212 elif xsdURI.startswith("http"):
213 conn = httplib.HTTPConnection(host,
214 httplib.HTTPConnection.default_port)
215 conn.request("GET", xsdURI)
216 # If we can't download the schema, raise an exception
217 r1 = conn.getresponse()
220 return r1.read().replace('\n', '').replace('\t', '').strip()
223 def _parseXSD(self, xsdURI):
225 Download XSD from URL, or if file, read local xsd file and set
228 Since the schema definiton is a global namespace shared by and
229 agreed upon by others, this should probably be a URL. Check
230 for URL, download xsd, parse, or if local file, use that.
233 if xsdURI.startswith("http"):
235 schemaDom = minidom.parseString(self._httpGetXSD(xsdURI))
237 # logging.debug("%s: web file not found" % xsdURI)
238 # logging.debug("Using local file %s" % self.xsd")
239 info_logger.log_exc("rspec.parseXSD: can't find %s on the web. Continuing." % xsdURI)
241 if os.path.exists(xsdURI):
242 # logging.debug("using local copy.")
243 info_logger.debug("rspec.parseXSD: Using local %s" % xsdURI)
244 schemaDom = minidom.parse(xsdURI)
246 raise Exception("rspec.parseXSD: can't find xsd locally")
247 self.schemaDict = self.toDict(schemaDom.childNodes[0])
250 def dict2dom(self, rdict, include_doc = False):
252 convert a dict object into a dom object.
255 def elementNode(tagname, rd):
256 element = minidom.Element(tagname)
257 for key in rd.keys():
258 if isinstance(rd[key], StringTypes) or isinstance(rd[key], int):
259 element.setAttribute(key, unicode(rd[key]))
260 elif isinstance(rd[key], dict):
261 child = elementNode(key, rd[key])
262 element.appendChild(child)
263 elif isinstance(rd[key], list):
265 if isinstance(item, dict):
266 child = elementNode(key, item)
267 element.appendChild(child)
268 elif isinstance(item, StringTypes) or isinstance(item, int):
269 child = minidom.Element(key)
270 text = minidom.Text()
272 child.appendChild(text)
273 element.appendChild(child)
276 # Minidom does not allow documents to have more then one
277 # child, but elements may have many children. Because of
278 # this, the document's root node will be the first key/value
279 # pair in the dictionary.
280 node = elementNode(rdict.keys()[0], rdict.values()[0])
282 rootNode = minidom.Document()
283 rootNode.appendChild(node)
289 def parseDict(self, rdict, include_doc = True):
291 Convert a dictionary into a dom object and store it.
293 self.rootNode = self.dict2dom(rdict, include_doc).childNodes[0]
296 def getDictsByTagName(self, tagname, dom = None):
298 Search the dom for all elements with the specified tagname
299 and return them as a list of dicts
304 doms = dom.getElementsByTagName(tagname)
305 dictlist = [self.toDict(d) for d in doms]
306 for item in dictlist:
307 for value in item.values():
311 def getDictByTagNameValue(self, tagname, value, dom = None):
313 Search the dom for the first element with the specified tagname
314 and value and return it as a dict.
319 dicts = self.getDictsByTagName(tagname, dom)
322 if rdict.has_key('name') and rdict['name'] in [value]:
328 def filter(self, tagname, attribute, blacklist = [], whitelist = [], dom = None):
330 Removes all elements where:
331 1. tagname matches the element tag
332 2. attribute matches the element attribte
333 3. attribute value is in valuelist
340 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
341 if whitelist and dom.attributes.get(attribute).value not in whitelist:
342 dom.parentNode.removeChild(dom)
343 if blacklist and dom.attributes.get(attribute).value in blacklist:
344 dom.parentNode.removeChild(dom)
346 if dom.hasChildNodes():
347 for child in dom.childNodes:
348 self.filter(tagname, attribute, blacklist, whitelist, child)
351 def merge(self, rspecs, tagname, dom=None):
353 Merge this rspec with the requested rspec based on the specified
354 starting tag name. The start tag (and all of its children) will be merged
363 if dom.localName in [tagname] and dom.attributes.has_key(attribute):
364 if whitelist and dom.attributes.get(attribute).value not in whitelist:
365 dom.parentNode.removeChild(dom)
366 if blacklist and dom.attributes.get(attribute).value in blacklist:
367 dom.parentNode.removeChild(dom)
369 if dom.hasChildNodes():
370 for child in dom.childNodes:
371 self.filter(tagname, attribute, blacklist, whitelist, child)
373 def validateDicts(self):
383 def pprint(self, r = None, depth = 0):
385 Pretty print the dict
388 if r == None: r = self.dict
390 for tab in range(0,depth): line += " "
391 # check if it's nested
394 print line + "%s:" % i
395 self.pprint(r[i], depth + 1)
396 elif type(r) in (tuple, list):
397 for j in r: self.pprint(j, depth + 1)
398 # not nested so just print.
400 print line + "%s" % r
404 class RecordSpec(RSpec):
407 def parseDict(self, rdict, include_doc = False):
409 Convert a dictionary into a dom object and store it.
411 self.rootNode = self.dict2dom(rdict, include_doc)
413 def dict2dom(self, rdict, include_doc = False):
415 if not len(rdict.keys()) == 1:
416 record_dict = {self.root_tag : rdict}
417 return RSpec.dict2dom(self, record_dict, include_doc)