1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 """Extension to libxml2 for XMPP stream and stanza processing"""
20
21 __docformat__="restructuredtext en"
22
23 import sys
24 import libxml2
25 import threading
26 import re
27 import logging
28
29 from pyxmpp.exceptions import StreamParseError
30
31 common_doc = libxml2.newDoc("1.0")
32 common_root = common_doc.newChild(None,"root",None)
33 COMMON_NS = "http://pyxmpp.jajcus.net/xmlns/common"
34 common_ns = common_root.newNs(COMMON_NS, None)
35 common_root.setNs(common_ns)
36 common_doc.setRootElement(common_root)
37
38 logger = logging.getLogger("pyxmpp.xmlextra")
39
41 """Base class for stream handler."""
44
49
51 """Process stream end."""
52 doc=libxml2.xmlDoc(_doc)
53 self.stream_end(doc)
54
56 """Process complete stanza."""
57 doc=libxml2.xmlDoc(_doc)
58 node=libxml2.xmlNode(_node)
59 self.stanza(doc,node)
60
62 """Called when the start tag of root element is encountered
63 in the stream.
64
65 :Parameters:
66 - `doc`: the document being parsed.
67 :Types:
68 - `doc`: `libxml2.xmlDoc`"""
69 print >>sys.stderr,"Unhandled stream start:",`doc.serialize()`
70
72 """Called when the end tag of root element is encountered
73 in the stream.
74
75 :Parameters:
76 - `doc`: the document being parsed.
77 :Types:
78 - `doc`: `libxml2.xmlDoc`"""
79 print >>sys.stderr,"Unhandled stream end",`doc.serialize()`
80
81 - def stanza(self, _unused, node):
82 """Called when the end tag of a direct child of the root
83 element is encountered in the stream.
84
85 Please note, that node will be removed from the document
86 and freed after this method returns. If it is needed after
87 that a copy must be made before the method returns.
88
89 :Parameters:
90 - `_unused`: the document being parsed.
91 - `node`: the (complete) element being processed
92 :Types:
93 - `_unused`: `libxml2.xmlDoc`
94 - `node`: `libxml2.xmlNode`"""
95 print >>sys.stderr,"Unhandled stanza",`node.serialize()`
96
98 """Called when an error is encountered in the stream.
99
100 :Parameters:
101 - `descr`: description of the error
102 :Types:
103 - `descr`: `str`"""
104 raise StreamParseError,descr
105
107 """Called when an warning is encountered in the stream.
108
109 :Parameters:
110 - `descr`: description of the warning
111 :Types:
112 - `descr`: `str`"""
113
114 if desc.startswith('xmlns: URI vcard-temp is not absolute'):
115 return
116
117 if desc.startswith('xmlns: http://www.xmpp.org/extensions/xep-0084.html#'):
118 return
119 logger.warning("XML STREAM WARNING: {0}".format(desc))
120
121 try:
122
123
124
125 from pyxmpp import _xmlextra
126 from pyxmpp._xmlextra import error
127
128 _create_reader = _xmlextra.sax_reader_new
129
131 """Replace namespaces in a whole subtree.
132
133 The old namespace declaration will be removed if present on the `node`.
134
135 :Parameters:
136 - `node`: the root of the subtree where namespaces should be replaced.
137 - `old_ns`: the namespace to replace.
138 - `new_ns`: the namespace to be used instead of old_ns.
139 :Types:
140 - `node`: `libxml2.xmlNode`
141 - `old_ns`: `libxml2.xmlNs`
142 - `new_ns`: `libxml2.xmlNs`
143
144 Both old_ns and new_ns may be None meaning no namespace set."""
145 if old_ns is None:
146 old_ns__o = None
147 else:
148 old_ns__o = old_ns._o
149 if new_ns is None:
150 new_ns__o = None
151 else:
152 new_ns__o = new_ns._o
153 if node is None:
154 node__o = None
155 else:
156 node__o = node._o
157 _xmlextra.replace_ns(node__o, old_ns__o, new_ns__o)
158 if old_ns__o:
159 _xmlextra.remove_ns(node__o, old_ns__o)
160
161 pure_python = False
162
163 except ImportError:
164
165
166
168 """Exception raised on a stream parse error."""
169 pass
170
172 """Escape data for XML"""
173 data=data.replace("&","&")
174 data=data.replace("<","<")
175 data=data.replace(">",">")
176 data=data.replace("'","'")
177 data=data.replace('"',""")
178 return data
179
181 """SAX events handler for the python-only stream parser."""
183 """Initialize the SAX handler.
184
185 :Parameters:
186 - `handler`: Object to handle stream start, end and stanzas.
187 :Types:
188 - `handler`: `StreamHandler`
189 """
190 self._handler = handler
191 self._head = ""
192 self._tail = ""
193 self._current = ""
194 self._level = 0
195 self._doc = None
196 self._root = None
197
199 ""
200 if self._level>1:
201 self._current += _escape(data)
202
204 ""
205 if self._level>1:
206 self._current += _escape(data)
207
211
215
217 ""
218 self._current+="</%s>" % (tag,)
219 self._level -= 1
220 if self._level > 1:
221 return
222 if self._level==1:
223 xml=self._head+self._current+self._tail
224 doc=libxml2.parseDoc(xml)
225 try:
226 node = doc.getRootElement().children
227 try:
228 node1 = node.docCopyNode(self._doc, 1)
229 try:
230 self._root.addChild(node1)
231 self._handler.stanza(self._doc, node1)
232 except Exception, e:
233 node1.unlinkNode()
234 node1.freeNode()
235 del node1
236 raise e
237 finally:
238 del node
239 finally:
240 doc.freeDoc()
241 else:
242 xml=self._head+self._tail
243 doc=libxml2.parseDoc(xml)
244 try:
245 self._handler.stream_end(self._doc)
246 self._doc.freeDoc()
247 self._doc = None
248 self._root = None
249 finally:
250 doc.freeDoc()
251
253 ""
254 self._handler.error(msg)
255
256 fatalError = error
257
258 ignorableWhitespace = characters
259
261 ""
262 self._current += "&" + name + ";"
263
267
269 ""
270 s = "<"+tag
271 if attrs:
272 for a,v in attrs.items():
273 s+=" %s='%s'" % (a,_escape(v))
274 s += ">"
275 if self._level == 0:
276 self._head = s
277 self._tail = "</%s>" % (tag,)
278 xml=self._head+self._tail
279 self._doc = libxml2.parseDoc(xml)
280 self._handler.stream_start(self._doc)
281 self._root = self._doc.getRootElement()
282 elif self._level == 1:
283 self._current = s
284 else:
285 self._current += s
286 self._level += 1
287
291
293 """Python-only stream reader."""
295 """Initialize the reader.
296
297 :Parameters:
298 - `handler`: Object to handle stream start, end and stanzas.
299 :Types:
300 - `handler`: `StreamHandler`
301 """
302 self.handler = handler
303 self.sax = _SAXCallback(handler)
304 self.parser = libxml2.createPushParser(self.sax, '', 0, 'stream')
305
306 - def feed(self, data):
307 """Feed the parser with a chunk of data. Apropriate methods
308 of `self.handler` will be called whenever something interesting is
309 found.
310
311 :Parameters:
312 - `data`: the chunk of data to parse.
313 :Types:
314 - `data`: `str`"""
315 return self.parser.parseChunk(data, len(data), 0)
316
317 _create_reader = _PythonReader
318
320 """Get namespace of node.
321
322 :return: the namespace object or `None` if the node has no namespace
323 assigned.
324 :returntype: `libxml2.xmlNs`"""
325 try:
326 return node.ns()
327 except libxml2.treeError:
328 return None
329
331 """Replace namespaces in a whole subtree.
332
333 :Parameters:
334 - `node`: the root of the subtree where namespaces should be replaced.
335 - `old_ns`: the namespace to replace.
336 - `new_ns`: the namespace to be used instead of old_ns.
337 :Types:
338 - `node`: `libxml2.xmlNode`
339 - `old_ns`: `libxml2.xmlNs`
340 - `new_ns`: `libxml2.xmlNs`
341
342 Both old_ns and new_ns may be None meaning no namespace set."""
343
344 if old_ns is not None:
345 old_ns_uri = old_ns.content
346 old_ns_prefix = old_ns.name
347 else:
348 old_ns_uri = None
349 old_ns_prefix = None
350
351 ns = _get_ns(node)
352 if ns is None and old_ns is None:
353 node.setNs(new_ns)
354 elif ns and ns.content == old_ns_uri and ns.name == old_ns_prefix:
355 node.setNs(new_ns)
356
357 p = node.properties
358 while p:
359 ns = _get_ns(p)
360 if ns is None and old_ns is None:
361 p.setNs(new_ns)
362 if ns and ns.content == old_ns_uri and ns.name == old_ns_prefix:
363 p.setNs(new_ns)
364 p = p.next
365
366 n = node.children
367 while n:
368 if n.type == 'element':
369 skip_element = False
370 try:
371 nsd = n.nsDefs()
372 except libxml2.treeError:
373 nsd = None
374 while nsd:
375 if nsd.name == old_ns_prefix:
376 skip_element = True
377 break
378 nsd = nsd.next
379 if not skip_element:
380 replace_ns(n, old_ns, new_ns)
381 n = n.next
382
383 pure_python = True
384
385
386
387
388
390 """Namespace of an XML node.
391
392 :Parameters:
393 - `xmlnode`: the XML node to query.
394 :Types:
395 - `xmlnode`: `libxml2.xmlNode`
396
397 :return: namespace of the node or `None`
398 :returntype: `libxml2.xmlNs`"""
399 try:
400 return xmlnode.ns()
401 except libxml2.treeError:
402 return None
403
405 """Return namespace URI of an XML node.
406
407 :Parameters:
408 - `xmlnode`: the XML node to query.
409 :Types:
410 - `xmlnode`: `libxml2.xmlNode`
411
412 :return: namespace URI of the node or `None`
413 :returntype: `unicode`"""
414 ns=get_node_ns(xmlnode)
415 if ns:
416 return unicode(ns.getContent(),"utf-8")
417 else:
418 return None
419
421 """Iterate over sibling XML nodes. All types of nodes will be returned
422 (not only the elements).
423
424 Usually used to iterade over node's children like this::
425
426 xml_node_iter(node.children)
427
428 :Parameters:
429 - `nodelist`: start node of the list.
430 :Types:
431 - `nodelist`: `libxml2.xmlNode`
432 """
433 node = nodelist
434 while node:
435 yield node
436 node = node.next
437
439 """Iterate over sibling XML elements. Non-element nodes will be skipped.
440
441 Usually used to iterade over node's children like this::
442
443 xml_node_iter(node.children)
444
445 :Parameters:
446 - `nodelist`: start node of the list.
447 :Types:
448 - `nodelist`: `libxml2.xmlNode`
449 """
450 node = nodelist
451 while node:
452 if node.type == "element":
453 yield node
454 node = node.next
455
457 """Iterate over sibling XML elements. Only elements in the given namespace will be returned.
458
459 Usually used to iterade over node's children like this::
460
461 xml_node_iter(node.children)
462
463 :Parameters:
464 - `nodelist`: start node of the list.
465 :Types:
466 - `nodelist`: `libxml2.xmlNode`
467 """
468 node = nodelist
469 while node:
470 if node.type == "element" and get_node_ns_uri(node)==ns_uri:
471 yield node
472 node = node.next
473
474 evil_characters_re=re.compile(r"[\000-\010\013\014\016-\037]",re.UNICODE)
475 utf8_replacement_char=u"\ufffd".encode("utf-8")
476
483
484 bad_nsdef_replace_re=re.compile(r"^([^<]*\<[^><]*\s+)(xmlns=((\"[^\"]*\")|(\'[^\']*\')))")
485
487 """Serialize an XML element making sure the result is sane.
488
489 Remove control characters and invalid namespace declarations from the
490 result string.
491
492 :Parameters:
493 - `xmlnode`: the XML element to serialize.
494 :Types:
495 - `xmlnode`: `libxml2.xmlNode`
496
497 :return: UTF-8 encoded serialized and sanitized element.
498 :returntype: `string`"""
499 try:
500 ns = xmlnode.ns()
501 except libxml2.treeError:
502 ns = None
503 try:
504 nsdef = xmlnode.nsDefs()
505 except libxml2.treeError:
506 nsdef = None
507 s=xmlnode.serialize(encoding="UTF-8")
508 while nsdef:
509 if nsdef.name is None and (not ns or (nsdef.name, nsdef.content)!=(ns.name, ns.content)):
510 s = bad_nsdef_replace_re.sub("\\1",s,1)
511 break
512 nsdef = nsdef.next
513 s=remove_evil_characters(s)
514 return s
515
517 """A simple push-parser interface for XML streams."""
519 """Initialize `StreamReader` object.
520
521 :Parameters:
522 - `handler`: handler object for the stream content
523 :Types:
524 - `handler`: `StreamHandler` derived class
525 """
526 self.reader=_create_reader(handler)
527 self.lock=threading.RLock()
528 self.in_use=0
530 """Get the document being parsed.
531
532 :return: the document.
533 :returntype: `libxml2.xmlNode`"""
534 ret=self.reader.doc()
535 if ret:
536 return libxml2.xmlDoc(ret)
537 else:
538 return None
540 """Pass a string to the stream parser.
541
542 Parameters:
543 - `s`: string to parse.
544 Types:
545 - `s`: `str`
546
547 :return: `None` on EOF, `False` when whole input was parsed and `True`
548 if there is something still left in the buffer."""
549 self.lock.acquire()
550 if self.in_use:
551 self.lock.release()
552 raise StreamParseError,"StreamReader.feed() is not reentrant!"
553 self.in_use=1
554 try:
555 return self.reader.feed(s)
556 finally:
557 self.in_use=0
558 self.lock.release()
559
560
561
562