# -*- coding: utf-8 -*-# vim: tabstop=4 shiftwidth=4 softtabstop=4## Copyright (C) 2014-2023 GEM Foundation## OpenQuake is free software: you can redistribute it and/or modify it# under the terms of the GNU Affero General Public License as published# by the Free Software Foundation, either version 3 of the License, or# (at your option) any later version.## OpenQuake is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU Affero General Public License for more details.## You should have received a copy of the GNU Affero General Public License# along with OpenQuake. If not, see <http://www.gnu.org/licenses/>."""This module defines a Node class, together with a few conversionfunctions which are able to convert NRML files into hierarchicalobjects (DOM). That makes it easier to read and write XML from Pythonand viceversa. Such features are used in the command-line conversiontools. The Node class is kept intentionally similar to anElement class, however it overcomes the limitation of ElementTree: inparticular a node can manage a lazy iterable of subnodes, whereasElementTree wants to keep everything in memory. Moreover the Nodeclass provides a convenient dot notation to access subnodes.The Node class is instantiated with four arguments:1. the node tag (a mandatory string)2. the node attributes (a dictionary)3. the node value (a string or None)4. the subnodes (an iterable over nodes)If a node has subnodes, its value should be None.For instance, here is an example of instantiating a root nodewith two subnodes a and b:>>> from openquake.baselib.node import Node>>> a = Node('a', {}, 'A1')>>> b = Node('b', {'attrb': 'B'}, 'B1')>>> root = Node('root', nodes=[a, b])>>> root<root {} None ...>Node objects can be converted into nicely indented strings:>>> print(root.to_str())root a 'A1' b{attrb='B'} 'B1'<BLANKLINE>The subnodes can be retrieved with the dot notation:>>> root.a<a {} A1 >The value of a node can be extracted with the `~` operator:>>> ~root.a'A1'If there are multiple subnodes with the same name>>> root.append(Node('a', {}, 'A2')) # add another 'a' nodethe dot notation will retrieve the first node.It is possible to retrieve the other nodes from the ordinalindex:>>> root[0], root[1], root[2](<a {} A1 >, <b {'attrb': 'B'} B1 >, <a {} A2 >)The list of all subnodes with a given name can be retrievedas follows:>>> list(root.getnodes('a'))[<a {} A1 >, <a {} A2 >]It is also possible to delete a node given its index:>>> del root[2]A node is an iterable object yielding its subnodes:>>> list(root)[<a {} A1 >, <b {'attrb': 'B'} B1 >]The attributes of a node can be retrieved with the square bracket notation:>>> root.b['attrb']'B'It is possible to add and remove attributes freely:>>> root.b['attr'] = 'new attr'>>> del root.b['attr']Node objects can be easily converted into ElementTree objects:>>> node_to_elem(root) #doctest: +ELLIPSIS<Element 'root' at ...>Then is trivial to generate the XML representation of a node:>>> from xml.etree import ElementTree>>> print(ElementTree.tostring(node_to_elem(root)).decode('utf-8'))<root><a>A1</a><b attrb="B">B1</b></root>Generating XML files larger than the available memory requires somecare. The trick is to use a node generator, such that it is notnecessary to keep the entire tree in memory. Here is an example:>>> def gen_many_nodes(N):... for i in xrange(N):... yield Node('a', {}, 'Text for node %d' % i)>>> lazytree = Node('lazytree', {}, nodes=gen_many_nodes(10))The lazytree object defined here consumes no memory, because thenodes are not created a instantiation time. They are created assoon as you start iterating on the lazytree. In particularlist(lazytree) will generated all of them. If your goal is tostore the tree on the filesystem in XML format you should usea writing routine converting a subnode at the time, withoutrequiring the full list of them. The routines provided byElementTree are no good, however baselib.writersprovide an StreamingXMLWriter just for that purpose.Lazy trees should *not* be used unless it is absolutely necessary inorder to save memory; the problem is that if you use a lazy tree theslice notation will not work (the underlying generator will not acceptit); moreover it will not be possible to iterate twice on thesubnodes, since the generator will be exhausted. Notice that evenaccessing a subnode with the dot notation will avance thegenerator. Finally, nodes containing lazy nodes will not be pickleable."""importioimportsysimportcopyimporttypesimportwarningsimportitertoolsimportpprintasppimportconfigparserfromcontextlibimportcontextmanagerfromopenquake.baselib.python3compatimportraise_,decode,encodefromxml.etreeimportElementTreefromxml.sax.saxutilsimportescape,quoteattrfromxml.parsers.expatimportParserCreate,ExpatError,ErrorStringimportnumpy
[docs]@contextmanagerdeffloatformat(fmt_string):""" Context manager to change the default format string for the function :func:`openquake.baselib.writers.write_csv`. :param fmt_string: the format to use; for instance '%13.9E' """fmt_defaults=scientificformat.__defaults__scientificformat.__defaults__=(fmt_string,)+fmt_defaults[1:]try:yieldfinally:scientificformat.__defaults__=fmt_defaults
zeroset=set(['E','-','+','.','0'])
[docs]defscientificformat(value,fmt='%13.9E',sep=' ',sep2=':'):""" :param value: the value to convert into a string :param fmt: the formatting string to use for float values :param sep: separator to use for vector-like values :param sep2: second separator to use for matrix-like values Convert a float or an array into a string by using the scientific notation and a fixed precision (by default 10 decimal digits). For instance: >>> scientificformat(-0E0) '0.000000000E+00' >>> scientificformat(-0.004) '-4.000000000E-03' >>> scientificformat([0.004]) '4.000000000E-03' >>> scientificformat([0.01, 0.02], '%10.6E') '1.000000E-02 2.000000E-02' >>> scientificformat([[0.1, 0.2], [0.3, 0.4]], '%4.1E') '1.0E-01:2.0E-01 3.0E-01:4.0E-01' """ifisinstance(value,numpy.bool_):return'1'ifvalueelse'0'elifisinstance(value,bytes):returnvalue.decode('utf8')elifisinstance(value,str):returnvalueelifhasattr(value,'__len__'):returnsep.join((scientificformat(f,fmt,sep2)forfinvalue))elifisinstance(value,(float,numpy.float64,numpy.float32)):fmt_value=fmt%valueifset(fmt_value)<=zeroset:# '-0.0000000E+00' is converted into '0.0000000E+00fmt_value=fmt_value.replace('-','')returnfmt_valuereturnstr(value)
[docs]deftostring(node,indent=4,nsmap=None):""" Convert a node into an XML string by using the StreamingXMLWriter. This is useful for testing purposes. :param node: a node object (typically an ElementTree object) :param indent: the indentation to use in the XML (default 4 spaces) """out=io.BytesIO()writer=StreamingXMLWriter(out,indent,nsmap=nsmap)writer.serialize(node)returnout.getvalue()
[docs]classStreamingXMLWriter(object):""" A bynary stream XML writer. The typical usage is something like this:: with StreamingXMLWriter(output_file) as writer: writer.start_tag('root') for node in nodegenerator(): writer.serialize(node) writer.end_tag('root') """def__init__(self,bytestream,indent=4,encoding='utf-8',nsmap=None):""" :param bytestream: the stream or file where to write the XML :param int indent: the indentation to use in the XML (default 4 spaces) """# guard against a common error, one must use io.BytesIOifisinstance(bytestream,(io.StringIO,io.TextIOWrapper)):raiseTypeError('%r is not a byte stream'%bytestream)self.stream=bytestreamself.indent=indentself.encoding=encodingself.indentlevel=0self.nsmap=nsmap
[docs]defshorten(self,tag):""" Get the short representation of a fully qualified tag :param str tag: a (fully qualified or not) XML tag """iftag.startswith('{'):ns,_tag=tag.rsplit('}')tag=self.nsmap.get(ns[1:],'')+_tagreturntag
def_write(self,text):"""Write text by respecting the current indentlevel"""spaces=' '*(self.indent*self.indentlevel)t=spaces+text.strip()+'\n'ifhasattr(t,'encode'):t=t.encode(self.encoding,'xmlcharrefreplace')self.stream.write(t)# expected bytes
[docs]defemptyElement(self,name,attrs):"""Add an empty element (may have attributes)"""attr=' '.join('%s=%s'%(n,quoteattr(scientificformat(v)))forn,vinsorted(attrs.items()))self._write('<%s%s/>'%(name,attr))
[docs]defstart_tag(self,name,attrs=None):"""Open an XML tag"""ifnotattrs:self._write('<%s>'%name)else:self._write('<'+name)for(name,value)insorted(attrs.items()):self._write(' %s=%s'%(name,quoteattr(scientificformat(value))))self._write('>')self.indentlevel+=1
[docs]defend_tag(self,name):"""Close an XML tag"""self.indentlevel-=1self._write('</%s>'%name)
[docs]defserialize(self,node):"""Serialize a node object (typically an ElementTree object)"""ifisinstance(node.tag,types.FunctionType):# this looks like a bug of ElementTree: comments are stored as# functions!?? see https://hg.python.org/sandbox/python2.7/file/tip/Lib/xml/etree/ElementTree.py#l458returnifself.nsmapisnotNone:tag=self.shorten(node.tag)else:tag=node.tagwithwarnings.catch_warnings():# unwanted ElementTree warningwarnings.simplefilter('ignore')leafnode=notnode# NB: we cannot use len(node) to identify leafs since nodes containing# an iterator have no length. They are always True, even if empty :-(ifleafnodeandnode.textisNone:self.emptyElement(tag,node.attrib)returnself.start_tag(tag,node.attrib)ifnode.textisnotNone:ifstriptag(node.tag)=='posList':# NOTE: by convention, posList must be a flat list of# space-separated coordinates, so we need to flatten any# nested lists or tuples, producing a single list of valuesobj=node.textwhile(isinstance(obj,(list,tuple))andisinstance(obj[0],(list,tuple))):obj=list(itertools.chain(*obj))txt=escape(scientificformat(obj).strip())else:txt=escape(scientificformat(node.text).strip())iftxt:self._write(txt)forsubnodeinnode:self.serialize(subnode)self.end_tag(tag)
def__enter__(self):"""Write the XML declaration"""self._write('<?xml version="1.0" encoding="%s"?>\n'%self.encoding)returnselfdef__exit__(self,etype,exc,tb):"""Close the XML document"""pass
[docs]classSourceLineParser(ElementTree.XMLParser):""" A custom parser managing line numbers: works for Python <= 3.3 """def_start_list(self,tag,attrib_in):elem=super()._start_list(tag,attrib_in)elem.lineno=self.parser.CurrentLineNumber# there is also CurrentColumnNumber available, if wantedreturnelem
[docs]deffromstring(text):"""Parse an XML string and return a tree"""returnElementTree.fromstring(text,SourceLineParser())
[docs]defparse(source,remove_comments=True,**kw):"""Thin wrapper around ElementTree.parse"""returnElementTree.parse(source,SourceLineParser(),**kw)
[docs]defiterparse(source,events=('end',),remove_comments=True,**kw):"""Thin wrapper around ElementTree.iterparse"""returnElementTree.iterparse(source,events,SourceLineParser(),**kw)
# ###################### utilities for the Node class ####################### #def_displayattrs(attrib,expandattrs):""" Helper function to display the attributes of a Node object in lexicographic order. :param attrib: dictionary with the attributes :param expandattrs: if True also displays the value of the attributes """ifnotattrib:return''ifexpandattrs:alist=['%s=%r'%itemforiteminsorted(attrib.items())]else:alist=list(attrib)return'{%s}'%', '.join(alist)def_display(node,indent,expandattrs,expandvals,output,striptags=True,shortentags=False,nsmap=None):"""Core function to display a Node object"""attrs=_displayattrs(node.attrib,expandattrs)ifnode.textisNoneornotexpandvals:val=''elifisinstance(node.text,str):val=' %s'%repr(node.text.strip())else:val=' %s'%repr(node.text)# node.text can be a tupletag=node.tagifshortentagsandnsmap:iftag.startswith('{'):ns,_tag=tag.rsplit('}')tag='{'+nsmap.get(ns[1:],'')+'}'+_tagelifstriptags:tag=striptag(node.tag)output.write(encode(indent+tag+attrs+val+'\n'))forsub_nodeinnode:_display(sub_node,indent+' ',expandattrs,expandvals,output,striptags,shortentags,nsmap)
[docs]defnode_display(root,expandattrs=False,expandvals=False,output=sys.stdout,striptags=True,shortentags=False,nsmap=None):""" Write an indented representation of the Node object on the output; this is intended for testing/debugging purposes. :param root: a Node object :param bool expandattrs: if True, the values of the attributes are also printed, not only the names :param bool expandvals: if True, the values of the tags are also printed, not only the names. :param output: stream where to write the string representation of the node :param bool striptags: do not display fully qualified tag names :param bool shortentags: display a shorter representation of the namespace (overriding the striptags parameter) :param dict nsmap: map of namespaces (keys are full names, values are the corresponding aliases) """_display(root,'',expandattrs,expandvals,output,striptags,shortentags,nsmap)
[docs]defstriptag(tag):""" Get the short representation of a fully qualified tag :param str tag: a (fully qualified or not) XML tag """iftag.startswith('{'):returntag.rsplit('}')[1]returntag
[docs]classNode(object):""" A class to make it easy to edit hierarchical structures with attributes, such as XML files. Node objects must be pickleable and must consume as little memory as possible. Moreover they must be easily converted from and to ElementTree objects. The advantage over ElementTree objects is that subnodes can be lazily generated and that they can be accessed with the dot notation. """__slots__=('tag','attrib','text','nodes','lineno')def__init__(self,fulltag,attrib=None,text=None,nodes=None,lineno=None):""" :param str tag: the Node name :param dict attrib: the Node attributes :param str text: the Node text (default None) :param nodes: an iterable of subnodes (default empty list) :param lineno: line number where the tag was read in the source xml """self.tag=fulltagself.attrib={}ifattribisNoneelseattribself.text=textself.nodes=[]ifnodesisNoneelsenodesself.lineno=linenoifself.nodesandself.textisnotNone:raiseValueError('A branch node cannot have a value, got %r'%self.text)def__getattr__(self,name):ifname.startswith('_'):# do the magic only for public namesraiseAttributeError(name)fornodeinself.nodes:ifstriptag(node.tag)==name:returnnoderaiseAttributeError("No subnode named '%s' found in '%s'"%(name,striptag(self.tag)))
[docs]defgetnodes(self,name):"Return the direct subnodes with name 'name'"fornodeinself.nodes:ifstriptag(node.tag)==name:yieldnode
[docs]defappend(self,node):"Append a new subnode"ifnotisinstance(node,self.__class__):raiseTypeError('Expected Node instance, got %r'%node)self.nodes.append(node)
[docs]defto_str(self,expandattrs=True,expandvals=True,striptags=True,shortentags=False):""" Convert the node into a string, intended for testing/debugging purposes :param expandattrs: print the values of the attributes if True, else print only the names :param expandvals: print the values if True, else print only the tag names :param bool striptags: do not display fully qualified tag names :param bool shortentags: display a shorter representation of the namespace (overriding the striptags parameter) """out=io.BytesIO()node_display(self,expandattrs,expandvals,out,striptags,shortentags,self.get_nsmap())returndecode(out.getvalue())
def__iter__(self):"""Iterate over subnodes"""returniter(self.nodes)def__repr__(self):"""A condensed representation for debugging purposes"""return'<%s%s%s%s>'%(striptag(self.tag),self.attrib,self.text,''ifnotself.nodeselse'...')def__getitem__(self,i):""" Retrieve a subnode, if i is an integer, or an attribute, if i is a string. """ifisinstance(i,str):returnself.attrib[i]else:# assume an integer or a slicereturnself.nodes[i]
[docs]defget(self,attr,value=None):""" Get the given `attr`; if missing, returns `value` or `None`. """returnself.attrib.get(attr,value)
def__setitem__(self,i,value):""" Update a subnode, if i is an integer, or an attribute, if i is a string. """ifisinstance(i,str):self.attrib[i]=valueelse:# assume an integer or a sliceself.nodes[i]=valuedef__delitem__(self,i):""" Remove a subnode, if i is an integer, or an attribute, if i is a string. """ifisinstance(i,str):delself.attrib[i]else:# assume an integer or a slicedelself.nodes[i]def__invert__(self):""" Return the value of a leaf; raise a TypeError if the node is not a leaf """ifself:raiseTypeError('%s is a composite node, not a leaf'%self)returnself.textdef__len__(self):"""Return the number of subnodes"""returnlen(self.nodes)def__bool__(self):""" Return True if there are subnodes; it does not iter on the subnodes, so for lazy nodes it returns True even if the generator is empty. """returnbool(self.nodes)def__deepcopy__(self,memo):new=object.__new__(self.__class__)new.tag=self.tagnew.attrib=self.attrib.copy()new.text=copy.copy(self.text)new.nodes=[copy.deepcopy(n,memo)forninself.nodes]new.lineno=self.linenoreturnnewdef__getstate__(self):returndict((slot,getattr(self,slot))forslotinself.__class__.__slots__)def__setstate__(self,state):forslotinself.__class__.__slots__:setattr(self,slot,state[slot])def__eq__(self,other):assertotherisnotNonereturnall(getattr(self,slot)==getattr(other,slot)forslotinself.__class__.__slots__)def__ne__(self,other):returnnotself.__eq__(other)
[docs]defto_literal(self):""" Convert the node into a literal Python object """ifnotself.nodes:return(self.tag,self.attrib,self.text,[])else:return(self.tag,self.attrib,self.text,list(map(to_literal,self.nodes)))
[docs]defpprint(self,stream=None,indent=1,width=80,depth=None):""" Pretty print the underlying literal Python object """pp.pprint(to_literal(self),stream,indent,width,depth)
[docs]defnode_from_dict(dic,nodefactory=Node):""" Convert a (nested) dictionary into a Node object. """[(tag,dic)]=dic.items()ifisinstance(dic,dict):dic=dic.copy()text=dic.pop('text',None)attrib={n[1:]:dic.pop(n)forninsorted(dic)ifn.startswith('_')}else:returnnodefactory(tag,{},dic)ifnotdic:returnnodefactory(tag,attrib,text)[(k,vs)]=dic.items()ifisinstance(vs,list):nodes=[node_from_dict({k:v})forvinvs]else:nodes=[node_from_dict(dic)]returnnodefactory(tag,attrib,nodes=nodes)
[docs]defnode_to_dict(node):""" Convert a Node object into a (nested) dictionary with attributes tag, attrib, text, nodes. :param node: a Node-compatible object """tag=striptag(node.tag)dic={}ifnode.attrib:fornam,valinnode.attrib.items():dic['_'+nam]=(float(val)ifisinstance(val,numpy.float64)elseval)ifisinstance(node.text,str)andnode.text.strip()=='':passelifnode.textisnotNone:ifnode.attrib:dic['text']=node.textelse:# TODO: ugly, dic sometimes is a dic and sometimes a scalar??dic=node.textifnode.nodes:dic.update(_group([node_to_dict(n)forninnode]))return{tag:dic}
[docs]defnode_from_elem(elem,nodefactory=Node,lazy=()):""" Convert (recursively) an ElementTree object into a Node object. """children=list(elem)lineno=getattr(elem,'lineno',None)ifnotchildren:returnnodefactory(elem.tag,dict(elem.attrib),elem.text,lineno=lineno)ifstriptag(elem.tag)inlazy:nodes=(node_from_elem(ch,nodefactory,lazy)forchinchildren)else:nodes=[node_from_elem(ch,nodefactory,lazy)forchinchildren]returnnodefactory(elem.tag,dict(elem.attrib),nodes=nodes,lineno=lineno)
# taken from https://gist.github.com/651801, which comes for the effbot
[docs]defnode_to_elem(root):""" Convert (recursively) a Node object into an ElementTree object. """defgenerate_elem(append,node,level):var="e"+str(level)arg=repr(node.tag)ifnode.attrib:arg+=", **%r"%node.attribiflevel==1:append("e1 = Element(%s)"%arg)else:append("%s = SubElement(e%d, %s)"%(var,level-1,arg))ifnotnode.nodes:append("%s.text = %r"%(var,node.text))forxinnode:generate_elem(append,x,level+1)# generate code to create a treeoutput=[]generate_elem(output.append,root,1)# print "\n".join(output)namespace={"Element":ElementTree.Element,"SubElement":ElementTree.SubElement}exec("\n".join(output),globals(),namespace)returnnamespace["e1"]
[docs]defread_nodes(fname,filter_elem,nodefactory=Node,remove_comments=True):""" Convert an XML file into a lazy iterator over Node objects satifying the given specification, i.e. a function element -> boolean. :param fname: file name of file object :param filter_elem: element specification In case of errors, add the file name to the error message. """try:for_,eliniterparse(fname,remove_comments=remove_comments):iffilter_elem(el):yieldnode_from_elem(el,nodefactory)el.clear()# save memoryexceptException:etype,exc,tb=sys.exc_info()msg=str(exc)ifstr(fname)notinmsg:msg='%s in %s'%(msg,fname)raise_(etype,msg,tb)
[docs]defnode_from_xml(xmlfile,nodefactory=Node):""" Convert a .xml file into a Node object. :param xmlfile: a file name or file object open for reading """root=parse(xmlfile).getroot()returnnode_from_elem(root,nodefactory)
[docs]defnode_to_xml(node,output,nsmap=None):""" Convert a Node object into a pretty .xml file without keeping everything in memory. If you just want the string representation use tostring(node). :param node: a Node-compatible object (ElementTree nodes are fine) :param output: a binary output file :param nsmap: if given, shorten the tags with aliases """ifnsmap:forns,prefixinnsmap.items():ifprefix:node['xmlns:'+prefix[:-1]]=nselse:node['xmlns']=nswithStreamingXMLWriter(output,nsmap=nsmap)asw:w.serialize(node)
[docs]defnode_from_ini(ini_file,nodefactory=Node,root_name='ini'):""" Convert a .ini file into a Node object. :param ini_file: a filename or a file like object in read mode """fileobj=open(ini_file)ifisinstance(ini_file,str)elseini_filecfp=configparser.ConfigParser(interpolation=None)cfp.read_file(fileobj)root=nodefactory(root_name)sections=cfp.sections()forsectioninsections:params=dict(cfp.items(section))root.append(Node(section,params))returnroot
[docs]defnode_to_ini(node,output=sys.stdout):""" Convert a Node object with the right structure into a .ini file. :params node: a Node object :params output: a file-like object opened in write mode """forsubnodeinnode:output.write(u'\n[%s]\n'%subnode.tag)forname,valueinsorted(subnode.attrib.items()):output.write(u'%s=%s\n'%(name,value))output.flush()
[docs]defnode_copy(node,nodefactory=Node):"""Make a deep copy of the node"""returnnodefactory(node.tag,node.attrib.copy(),node.text,[node_copy(n,nodefactory)forninnode])
[docs]@contextmanagerdefcontext(fname,node):""" Context manager managing exceptions and adding line number of the current node and name of the current file to the error message. :param fname: the current file being processed :param node: the current node being processed """try:yieldnodeexceptException:etype,exc,tb=sys.exc_info()msg='node %s: %s, line %s of %s'%(striptag(node.tag),exc,getattr(node,'lineno','?'),fname)raise_(etype,msg,tb)
[docs]classValidatingXmlParser(object):""" Validating XML Parser based on Expat. It has two methods `.parse_file` and `.parse_bytes` returning a validated :class:`Node` object. :param validators: a dictionary of validation functions :param stop: the tag where to stop the parsing (if any) """
[docs]classExit(Exception):"""Raised when the parsing is stopped before the end on purpose"""
[docs]defparse_bytes(self,bytestr,isfinal=True):""" Parse a byte string. If the string is very large, split it in chuncks and parse each chunk with isfinal=False, then parse an empty chunk with isfinal=True. """withself._context():self.filename=Noneself.p.Parse(bytestr,isfinal)returnself._root
[docs]defparse_file(self,file_or_fname):""" Parse a file or a filename """withself._context():ifhasattr(file_or_fname,'read'):self.filename=getattr(file_or_fname,'name',file_or_fname.__class__.__name__)self.p.ParseFile(file_or_fname)else:self.filename=file_or_fnamewithopen(file_or_fname,'rb')asf:self.p.ParseFile(f)returnself._root
def_start_element(self,longname,attrs):try:xmlns,name=longname.split('}')exceptValueError:# no namespace in the longnamename=tag=longnameelse:# fix the tag with an opening bracetag='{'+longnameself._ancestors.append(Node(tag,attrs,lineno=self.p.CurrentLineNumber))ifself.stopandname==self.stop:forancinreversed(self._ancestors):self._end_element(anc.tag)raiseself.Exitdef_end_element(self,name):node=self._ancestors[-1]ifisinstance(node.text,list):node.text=''.join(node.text)withcontext(self.filename,node):self._root=self._literalnode(node)delself._ancestors[-1]ifself._ancestors:self._ancestors[-1].append(self._root)def_char_data(self,data):ifdata:parent=self._ancestors[-1]ifparent.textisNone:parent.text=[data]else:parent.text.append(data)def_set_text(self,node,text,tag):iftextisNone:returntry:val=self.validators[tag]exceptKeyError:returntry:node.text=val(decode(text.strip()))exceptExceptionasexc:raiseValueError('Could not convert %s->%s: %s'%(tag,val.__name__,exc))def_set_attrib(self,node,n,tn,v):val=self.validators[tn]try:node.attrib[n]=val(decode(v))exceptExceptionasexc:# NOTE: the line number and the file name are added by the# 'context' contextmanagerraiseValueError('Could not convert %s->%s: %s'%(tn,val.__name__,exc))def_literalnode(self,node):tag=striptag(node.tag)# cast the textself._set_text(node,node.text,tag)# cast the attributesforn,vinnode.attrib.items():tn='%s.%s'%(tag,n)iftninself.validators:self._set_attrib(node,n,tn,v)elifninself.validators:self._set_attrib(node,n,n,v)returnnode