Package pyxmpp :: Module xmppstringprep
[hide private]

Source Code for Module pyxmpp.xmppstringprep

  1  # 
  2  # (C) Copyright 2003-2010 Jacek Konieczny <jajcus@jajcus.net> 
  3  # 
  4  # This program is free software; you can redistribute it and/or modify 
  5  # it under the terms of the GNU Lesser General Public License Version 
  6  # 2.1 as published by the Free Software Foundation. 
  7  # 
  8  # This program is distributed in the hope that it will be useful, 
  9  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 10  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 11  # GNU Lesser General Public License for more details. 
 12  # 
 13  # You should have received a copy of the GNU Lesser General Public 
 14  # License along with this program; if not, write to the Free Software 
 15  # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 16  # 
 17  # pylint treats "import stringprep" like depreciated "import string" 
 18  # pylint: disable-msg=W0402 
 19   
 20  """Nodeprep and resourceprep stringprep profiles. 
 21   
 22  Normative reference: 
 23    - `RFC 3920 <http://www.ietf.org/rfc/rfc3920.txt>`__ 
 24  """ 
 25   
 26  __docformat__="restructuredtext en" 
 27   
 28  import stringprep 
 29  import unicodedata 
 30  from pyxmpp.exceptions import StringprepError 
 31   
32 -class LookupFunction:
33 """Class for looking up RFC 3454 tables using function. 34 35 :Ivariables: 36 - `lookup`: the lookup function."""
37 - def __init__(self,function):
38 """Initialize `LookupFunction` object. 39 40 :Parameters: 41 - `function`: function taking character code as input and returning 42 `bool` value or the mapped for `code`.""" 43 self.lookup=function
44
45 -class LookupTable:
46 """Class for looking up RFC 3454 tables using a dictionary and/or list of ranges."""
47 - def __init__(self,singles,ranges):
48 """Initialize `LookupTable` object. 49 50 :Parameters: 51 - `singles`: dictionary mapping Unicode characters into other Unicode characters. 52 - `ranges`: list of ``((start,end),value)`` tuples mapping codes in range (start,end) 53 to the value.""" 54 self.singles=singles 55 self.ranges=ranges
56
57 - def lookup(self,c):
58 """Do Unicode character lookup. 59 60 :Parameters: 61 - `c`: Unicode character to look up. 62 63 :return: the mapped value.""" 64 if self.singles.has_key(c): 65 return self.singles[c] 66 c=ord(c) 67 for (start,end),value in self.ranges: 68 if c<start: 69 return None 70 if c<=end: 71 return value 72 return None
73 74 A_1=LookupFunction(stringprep.in_table_a1) 75
76 -def b1_mapping(uc):
77 """Do RFC 3454 B.1 table mapping. 78 79 :Parameters: 80 - `uc`: Unicode character to map. 81 82 :returns: u"" if there is `uc` code in the table, `None` otherwise.""" 83 if stringprep.in_table_b1(uc): 84 return u"" 85 else: 86 return None
87 88 B_1=LookupFunction(b1_mapping) 89 B_2=LookupFunction(stringprep.map_table_b2) 90 B_3=LookupFunction(stringprep.map_table_b3) 91 C_1_1=LookupFunction(stringprep.in_table_c11) 92 C_1_2=LookupFunction(stringprep.in_table_c12) 93 C_2_1=LookupFunction(stringprep.in_table_c21) 94 C_2_2=LookupFunction(stringprep.in_table_c22) 95 C_3=LookupFunction(stringprep.in_table_c3) 96 C_4=LookupFunction(stringprep.in_table_c4) 97 C_5=LookupFunction(stringprep.in_table_c5) 98 C_6=LookupFunction(stringprep.in_table_c6) 99 C_7=LookupFunction(stringprep.in_table_c7) 100 C_8=LookupFunction(stringprep.in_table_c8) 101 C_9=LookupFunction(stringprep.in_table_c9) 102 D_1=LookupFunction(stringprep.in_table_d1) 103 D_2=LookupFunction(stringprep.in_table_d2) 104
105 -def nfkc(data):
106 """Do NFKC normalization of Unicode data. 107 108 :Parameters: 109 - `data`: list of Unicode characters or Unicode string. 110 111 :return: normalized Unicode string.""" 112 if type(data) is list: 113 data=u"".join(data) 114 return unicodedata.normalize("NFKC",data)
115
116 -class Profile:
117 """Base class for stringprep profiles.""" 118 cache_items=[]
119 - def __init__(self,unassigned,mapping,normalization,prohibited,bidi=1):
120 """Initialize Profile object. 121 122 :Parameters: 123 - `unassigned`: the lookup table with unassigned codes 124 - `mapping`: the lookup table with character mappings 125 - `normalization`: the normalization function 126 - `prohibited`: the lookup table with prohibited characters 127 - `bidi`: if True then bidirectional checks should be done 128 """ 129 self.unassigned=unassigned 130 self.mapping=mapping 131 self.normalization=normalization 132 self.prohibited=prohibited 133 self.bidi=bidi 134 self.cache={}
135
136 - def prepare(self,data):
137 """Complete string preparation procedure for 'stored' strings. 138 (includes checks for unassigned codes) 139 140 :Parameters: 141 - `data`: Unicode string to prepare. 142 143 :return: prepared string 144 145 :raise StringprepError: if the preparation fails 146 """ 147 r=self.cache.get(data) 148 if r is not None: 149 return r 150 s=self.map(data) 151 if self.normalization: 152 s=self.normalization(s) 153 s=self.prohibit(s) 154 s=self.check_unassigned(s) 155 if self.bidi: 156 s=self.check_bidi(s) 157 if type(s) is list: 158 s=u"".string.join() 159 if len(self.cache_items)>=stringprep_cache_size: 160 remove=self.cache_items[:-stringprep_cache_size/2] 161 for profile,key in remove: 162 try: 163 del profile.cache[key] 164 except KeyError: 165 pass 166 self.cache_items[:]=self.cache_items[-stringprep_cache_size/2:] 167 self.cache_items.append((self,data)) 168 self.cache[data]=s 169 return s
170
171 - def prepare_query(self,s):
172 """Complete string preparation procedure for 'query' strings. 173 (without checks for unassigned codes) 174 175 :Parameters: 176 - `s`: Unicode string to prepare. 177 178 :return: prepared string 179 180 :raise StringprepError: if the preparation fails 181 """ 182 183 s=self.map(s) 184 if self.normalization: 185 s=self.normalization(s) 186 s=self.prohibit(s) 187 if self.bidi: 188 s=self.check_bidi(s) 189 if type(s) is list: 190 s=u"".string.join(s) 191 return s
192
193 - def map(self,s):
194 """Mapping part of string preparation.""" 195 r=[] 196 for c in s: 197 rc=None 198 for t in self.mapping: 199 rc=t.lookup(c) 200 if rc is not None: 201 break 202 if rc is not None: 203 r.append(rc) 204 else: 205 r.append(c) 206 return r
207
208 - def prohibit(self,s):
209 """Checks for prohibited characters.""" 210 for c in s: 211 for t in self.prohibited: 212 if t.lookup(c): 213 raise StringprepError,"Prohibited character: %r" % (c,) 214 return s
215
216 - def check_unassigned(self,s):
217 """Checks for unassigned character codes.""" 218 for c in s: 219 for t in self.unassigned: 220 if t.lookup(c): 221 raise StringprepError,"Unassigned character: %r" % (c,) 222 return s
223
224 - def check_bidi(self,s):
225 """Checks if sting is valid for bidirectional printing.""" 226 has_l=0 227 has_ral=0 228 for c in s: 229 if D_1.lookup(c): 230 has_ral=1 231 elif D_2.lookup(c): 232 has_l=1 233 if has_l and has_ral: 234 raise StringprepError,"Both RandALCat and LCat characters present" 235 if has_l and (D_1.lookup(s[0]) is None or D_1.lookup(s[-1]) is None): 236 raise StringprepError,"The first and the last character must be RandALCat" 237 return s
238 239 nodeprep=Profile( 240 unassigned=(A_1,), 241 mapping=(B_1,B_2), 242 normalization=nfkc, 243 prohibited=(C_1_1,C_1_2,C_2_1,C_2_2,C_3,C_4,C_5,C_6,C_7,C_8,C_9, 244 LookupTable({u'"':True,u'&':True,u"'":True,u"/":True, 245 u":":True,u"<":True,u">":True,u"@":True},()) ), 246 bidi=1) 247 248 resourceprep=Profile( 249 unassigned=(A_1,), 250 mapping=(B_1,), 251 normalization=nfkc, 252 prohibited=(C_1_2,C_2_1,C_2_2,C_3,C_4,C_5,C_6,C_7,C_8,C_9), 253 bidi=1) 254 255 stringprep_cache_size=1000
256 -def set_stringprep_cache_size(size):
257 """Modify stringprep cache size. 258 259 :Parameters: 260 - `size`: new cache size""" 261 global stringprep_cache_size 262 stringprep_cache_size=size 263 if len(Profile.cache_items)>size: 264 remove=Profile.cache_items[:-size] 265 for profile,key in remove: 266 try: 267 del profile.cache[key] 268 except KeyError: 269 pass 270 Profile.cache_items=Profile.cache_items[-size:]
271 272 # vi: sts=4 et sw=4 273