1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """
22 tupletree - Convert XML DOM objects to and from tuple trees.
23
24 DOM is the standard in-memory representation of XML documents, but it
25 is very cumbersome for some types of processing where XML encodes
26 object structures rather than text documents. Direct mapping to Python
27 classes may not be a good match either.
28
29 tupletrees may be created from an in-memory DOM using
30 dom_to_tupletree(), or from a string using xml_to_tupletree().
31
32 Since the Python XML libraries deal mostly with Unicode strings they
33 are also returned here. If plain Strings are passed in they will be
34 converted by xmldom.
35
36 Each node of the tuple tree is a Python 4-tuple, corresponding to an
37 XML Element (i.e. <tag>):
38
39 (NAME, ATTRS, CONTENTS, None)
40
41 The NAME is the name of the element.
42
43 The ATTRS are a name-value hash of element attributes.
44
45 The CONTENTS is a list of child elements.
46
47 The fourth element is reserved.
48 """
49
50 from types import StringTypes
51 import xml.dom.minidom
52
53 __all__ = ['dom_to_tupletree', 'xml_to_tupletree']
54
56 """Convert a DOM object to a pyRXP-style tuple tree.
57
58 Each element is a 4-tuple of (NAME, ATTRS, CONTENTS, None).
59
60 Very nice for processing complex nested trees.
61 """
62
63 if node.nodeType == node.DOCUMENT_NODE:
64
65 return dom_to_tupletree(node.firstChild)
66 assert node.nodeType == node.ELEMENT_NODE
67
68 name = node.nodeName
69 attrs = {}
70 contents = []
71
72 for child in node.childNodes:
73 if child.nodeType == child.ELEMENT_NODE:
74 contents.append(dom_to_tupletree(child))
75 elif child.nodeType == child.TEXT_NODE:
76 assert isinstance(child.nodeValue, StringTypes), \
77 "text node %s is not a string" % `child`
78 contents.append(child.nodeValue)
79 elif child.nodeType == child.CDATA_SECTION_NODE:
80 contents.append(child.nodeValue)
81 else:
82 raise RuntimeError("can't handle %s" % child)
83
84 for i in range(node.attributes.length):
85 attr_node = node.attributes.item(i)
86 attrs[attr_node.nodeName] = attr_node.nodeValue
87
88
89
90
91
92 return (name, attrs, contents, None)
93
94
96 """Parse XML straight into tupletree."""
97 dom_xml = xml.dom.minidom.parseString(xml_string)
98 return dom_to_tupletree(dom_xml)
99