X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=sfa%2Futil%2Fxml.py;h=9c4a80f01866b0bc676368cd1af3a6d648c5ac43;hb=30d9951e075d93127c3909dcb41be09b420b3525;hp=2ededa9e26efb0087a3b8f80ef931c3cd9e7a19d;hpb=bb89376a5c419e735000fd7de6d9988bbb062445;p=sfa.git diff --git a/sfa/util/xml.py b/sfa/util/xml.py index 2ededa9e..9c4a80f0 100755 --- a/sfa/util/xml.py +++ b/sfa/util/xml.py @@ -1,9 +1,12 @@ #!/usr/bin/python -from types import StringTypes from lxml import etree from StringIO import StringIO from sfa.util.faults import InvalidXML +from sfa.rspecs.elements.element import Element +from sfa.util.py23 import StringType + +# helper functions to help build xpaths class XpathFilter: @staticmethod @@ -18,7 +21,8 @@ class XpathFilter: return xpath @staticmethod - def xpath(filter={}): + def xpath(filter=None): + if filter is None: filter={} xpath = "" if filter: filter_list = [] @@ -37,31 +41,80 @@ class XpathFilter: xpath = '[' + xpath + ']' return xpath -class XmlNode: - def __init__(self, node, namespaces): - self.node = node - self.text = node.text +# a wrapper class around lxml.etree._Element +# the reason why we need this one is because of the limitations +# we've found in xpath to address documents with multiple namespaces defined +# in a nutshell, we deal with xml documents that have +# a default namespace defined (xmlns="http://default.com/") and specific prefixes defined +# (xmlns:foo="http://foo.com") +# according to the documentation instead of writing +# element.xpath ( "//node/foo:subnode" ) +# we'd then need to write xpaths like +# element.xpath ( "//{http://default.com/}node/{http://foo.com}subnode" ) +# which is a real pain.. +# So just so we can keep some reasonable programming style we need to manage the +# namespace map that goes with the _Element (its internal .nsmap being unmutable) + +class XmlElement: + def __init__(self, element, namespaces): + self.element = element self.namespaces = namespaces - self.attrib = node.attrib - + # redefine as few methods as possible def xpath(self, xpath, namespaces=None): if not namespaces: namespaces = self.namespaces - elems = self.node.xpath(xpath, namespaces=namespaces) - return [XmlNode(elem, namespaces) for elem in elems] - + elems = self.element.xpath(xpath, namespaces=namespaces) + return [XmlElement(elem, namespaces) for elem in elems] + def add_element(self, tagname, **kwds): - element = etree.SubElement(self.node, tagname, **kwds) - return XmlNode(element, self.namespaces) + element = etree.SubElement(self.element, tagname, **kwds) + return XmlElement(element, self.namespaces) def append(self, elem): - if isinstance(elem, XmlNode): - self.node.append(elem.node) + if isinstance(elem, XmlElement): + self.element.append(elem.element) else: - self.node.append(elem) + self.element.append(elem) - def remove_elements(name): + def getparent(self): + return XmlElement(self.element.getparent(), self.namespaces) + + def get_instance(self, instance_class=None, fields=None): + """ + Returns an instance (dict) of this xml element. The instance + holds a reference to this xml element. + """ + if fields is None: fields=[] + if not instance_class: + instance_class = Element + if not fields and hasattr(instance_class, 'fields'): + fields = instance_class.fields + + if not fields: + instance = instance_class(self.attrib, self) + else: + instance = instance_class({}, self) + for field in fields: + if field in self.attrib: + instance[field] = self.attrib[field] + return instance + + def add_instance(self, name, instance, fields=None): + """ + Adds the specifed instance(s) as a child element of this xml + element. + """ + if fields is None: fields=[] + if not fields and hasattr(instance, 'keys'): + fields = instance.keys() + elem = self.add_element(name) + for field in fields: + if field in instance and instance[field]: + elem.set(field, unicode(instance[field])) + return elem + + def remove_elements(self, name): """ Removes all occurences of an element from the tree. Start at specified root_node if specified, otherwise start at tree's root. @@ -69,32 +122,41 @@ class XmlNode: if not element_name.startswith('//'): element_name = '//' + element_name - elements = self.node.xpath('%s ' % name, namespaces=self.namespaces) + elements = self.element.xpath('%s ' % name, namespaces=self.namespaces) for element in elements: parent = element.getparent() parent.remove(element) - def remove(element): - self.node.remove(element) + def delete(self): + parent = self.getparent() + parent.remove(self) + + def remove(self, element): + if isinstance(element, XmlElement): + self.element.remove(element.element) + else: + self.element.remove(element) - def set(self, key, value): - self.node.set(key, value) - def set_text(self, text): - self.node.text = text + self.element.text = text + # Element does not have unset ?!? def unset(self, key): - del self.node.attrib[key] + del self.element.attrib[key] - def iterchildren(self): - return self.node.iterchildren() - def toxml(self): - return etree.tostring(self.node, encoding='UTF-8', pretty_print=True) + return etree.tostring(self.element, encoding='UTF-8', pretty_print=True) def __str__(self): return self.toxml() + ### other method calls or attribute access like .text or .tag or .get + # are redirected on self.element + def __getattr__ (self, name): + if not hasattr(self.element, name): + raise AttributeError(name) + return getattr(self.element, name) + class XML: def __init__(self, xml=None, namespaces=None): @@ -104,7 +166,7 @@ class XML: self.schema = None if isinstance(xml, basestring): self.parse_xml(xml) - if isinstance(xml, XmlNode): + if isinstance(xml, XmlElement): self.root = xml self.namespaces = xml.namespaces elif isinstance(xml, etree._ElementTree) or isinstance(xml, etree._Element): @@ -121,7 +183,7 @@ class XML: # 'rspec' file doesnt exist. 'rspec' is proably an xml string try: tree = etree.parse(StringIO(xml), parser) - except Exception, e: + except Exception as e: raise InvalidXML(str(e)) root = tree.getroot() self.namespaces = dict(root.nsmap) @@ -136,12 +198,13 @@ class XML: else: self.namespaces['default'] = 'default' - self.root = XmlNode(root, self.namespaces) - # set schema + self.root = XmlElement(root, self.namespaces) + # set schema for key in self.root.attrib.keys(): if key.endswith('schemaLocation'): - # schema location should be at the end of the list - schema_parts = self.root.attrib[key].split(' ') + # schemaLocation should be at the end of the list. + # Use list comprehension to filter out empty strings + schema_parts = [x for x in self.root.attrib[key].split(' ') if x] self.schema = schema_parts[1] namespace, schema = schema_parts[0], schema_parts[1] break @@ -150,7 +213,7 @@ class XML: if element is None: if self.root is None: self.parse_xml('<%s/>' % root_tag_name) - element = self.root + element = self.root.element if 'text' in d: text = d.pop('text') @@ -166,9 +229,9 @@ class XML: self.parse_dict(val, key, child_element) elif isinstance(val, basestring): child_element = etree.SubElement(element, key).text = val - + elif isinstance(value, int): - d[key] = unicode(d[key]) + d[key] = unicode(d[key]) elif value is None: d.pop(key) @@ -177,7 +240,7 @@ class XML: d=d.copy() # looks like iteritems won't stand side-effects for k in d.keys(): - if not isinstance(d[k],StringTypes): + if not isinstance(d[k], StringType): del d[k] element.attrib.update(d) @@ -199,49 +262,41 @@ class XML: namespaces = self.namespaces return self.root.xpath(xpath, namespaces=namespaces) - def set(self, key, value, node=None): - if not node: - node = self.root - return node.set(key, value) + def set(self, key, value): + return self.root.set(key, value) - def remove_attribute(self, name, node=None): - if not node: - node = self.root - node.remove_attribute(name) - + def remove_attribute(self, name, element=None): + if not element: + element = self.root + element.remove_attribute(name) - def add_element(self, name, **kwds): + def add_element(self, *args, **kwds): """ Wrapper around etree.SubElement(). Adds an element to specified parent node. Adds element to root node is parent is not specified. """ - parent = self.root - xmlnode = parent.add_element(name, *kwds) - return xmlnode + return self.root.add_element(*args, **kwds) - def remove_elements(self, name, node = None): + def remove_elements(self, name, element = None): """ Removes all occurences of an element from the tree. Start at specified root_node if specified, otherwise start at tree's root. """ - if not node: - node = self.root + if not element: + element = self.root + + element.remove_elements(name) - node.remove_elements(name) + def add_instance(self, *args, **kwds): + return self.root.add_instance(*args, **kwds) - def attributes_list(self, elem): - # convert a list of attribute tags into list of tuples - # (tagnme, text_value) - opts = [] - if elem is not None: - for e in elem: - opts.append((e.tag, str(e.text).strip())) - return opts + def get_instance(self, *args, **kwds): + return self.root.get_instnace(*args, **kwds) def get_element_attributes(self, elem=None, depth=0): if elem == None: - elem = self.root_node + elem = self.root if not hasattr(elem, 'attrib'): # this is probably not an element node with attribute. could be just and an # attribute, return it @@ -260,6 +315,12 @@ class XML: attrs['child_nodes'] = list(elem) return attrs + def append(self, elem): + return self.root.append(elem) + + def iterchildren(self): + return self.root.iterchildren() + def merge(self, in_xml): pass @@ -267,7 +328,7 @@ class XML: return self.toxml() def toxml(self): - return etree.tostring(self.root.node, encoding='UTF-8', pretty_print=True) + return etree.tostring(self.root.element, encoding='UTF-8', pretty_print=True) # XXX smbaker, for record.load_from_string def todict(self, elem=None):