from xml.etree import cElementTree as ET def write_tree(tree, output_filename): doc = ET.ElementTree(tree) out = open(output_filename, "w") doc.write(out, "UTF-8") out.close() class _E(object): def __call__(self, tag, *children, **attrib): elem = ET.Element(tag, attrib) for item in children: if isinstance(item, dict): elem.attrib.update(item) elif isinstance(item, basestring): if len(elem): elem[-1].tail = (elem[-1].tail or "") + item else: elem.text = (elem.text or "") + item elif ET.iselement(item): elem.append(item) else: raise TypeError("bad argument: %r" % item) return elem def __getattr__(self, tag): return lambda *args, **kwargs: self(tag, *args, **kwargs) # create factory object E = _E() HANDLER_ATTRIBUTE_NAME = "_handled" def element_handler(tag, event = "end"): assert event in ("start", "end") def _inner_element_handler(method): setattr(method, HANDLER_ATTRIBUTE_NAME, (event, tag)) return method return _inner_element_handler class iterparse_handler(type): def __new__(meta, classname, bases, classDict): classDict["__x_handlers__"] = handlers = {} for attr in classDict.itervalues(): if callable(attr) and hasattr(attr, HANDLER_ATTRIBUTE_NAME): handlers[getattr(attr, HANDLER_ATTRIBUTE_NAME)] = attr return type.__new__(meta, classname, bases, classDict) class IterParseHandler(object): __metaclass__ = iterparse_handler def parse(self, path): context = iter(ET.iterparse(path, events = ("start", "end"))) _, root = context.next() self._handle_root(root) for event, elem in context: try: handler = self.__x_handlers__[(event, elem.tag)] except KeyError: continue handler(self, elem) def _handle_root(self, elem): pass