[976] | 1 | ###############################################################################
|
---|
| 2 | ###
|
---|
| 3 | ### Python DBAPI 2.0 Paramstyle Conversions
|
---|
| 4 | ###
|
---|
| 5 | ### Peter L. Buschman <plb@iotk.com>
|
---|
| 6 | ###
|
---|
| 7 | ### 2004-08-30
|
---|
| 8 | ###
|
---|
| 9 | ###############################################################################
|
---|
| 10 |
|
---|
| 11 | import string
|
---|
| 12 | import re
|
---|
| 13 |
|
---|
| 14 |
|
---|
| 15 | ##
|
---|
| 16 | ## Categorization of paramstyles into different categories. Different types of conversions
|
---|
| 17 | ## will have different optimal algorithms.
|
---|
| 18 | ##
|
---|
| 19 | PARAMSTYLES = {
|
---|
| 20 | 'all' : [ 'qmark', 'numeric', 'named', 'format', 'pyformat' ],
|
---|
| 21 | 'sequence' : [ 'qmark', 'numeric', 'format' ],
|
---|
| 22 | 'dict' : [ 'named', 'pyformat' ],
|
---|
| 23 | 'token' : [ 'qmark', 'format' ],
|
---|
| 24 | }
|
---|
| 25 |
|
---|
| 26 |
|
---|
| 27 | ##
|
---|
| 28 | ## Valid quote characters.
|
---|
| 29 | ##
|
---|
| 30 | QUOTE_CHARS = [ '"', "'" ] # " or '
|
---|
| 31 |
|
---|
| 32 |
|
---|
| 33 | ##
|
---|
| 34 | ## Valid escape characters.
|
---|
| 35 | ##
|
---|
| 36 | ESCAPE_CHARS = [ '\\' ]
|
---|
| 37 |
|
---|
| 38 |
|
---|
| 39 | ##
|
---|
| 40 | ## This dictionary is used to lookup the placeholder strings for paramstyles whose
|
---|
| 41 | ## placeholders are always the same. This is currently just qmark and format.
|
---|
| 42 | ##
|
---|
| 43 | PLACEHOLDER_TOKENS = {
|
---|
| 44 | 'qmark' : '?',
|
---|
| 45 | 'format' : '%s',
|
---|
| 46 | }
|
---|
| 47 |
|
---|
| 48 |
|
---|
| 49 | ##
|
---|
| 50 | ## This dictionary is used to look-up regular expressions for matching placeholders
|
---|
| 51 | ## in a query string for a given paramstyle.
|
---|
| 52 | ##
|
---|
| 53 | PLACEHOLDER_EXPS = {
|
---|
| 54 | 'qmark' : re.compile(r'(\?)'),
|
---|
| 55 | 'numeric' : re.compile(r'(:\d+)'),
|
---|
| 56 | 'named' : re.compile(r'(:\w+)'),
|
---|
| 57 | 'format' : re.compile(r'(%s)'),
|
---|
| 58 | 'pyformat' : re.compile(r'(%\(\w+\)s)'),
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 |
|
---|
| 62 | ##
|
---|
| 63 | ## This dictionary is used to get the correct datastructure to return params in when converting
|
---|
| 64 | ## from one paramstyle to another.
|
---|
| 65 | ##
|
---|
| 66 | PARAM_TYPES = {
|
---|
| 67 | 'qmark' : lambda : [],
|
---|
| 68 | 'numeric' : lambda : [],
|
---|
| 69 | 'named' : lambda : {},
|
---|
| 70 | 'format' : lambda : [],
|
---|
| 71 | 'pyformat' : lambda : {},
|
---|
| 72 | }
|
---|
| 73 |
|
---|
| 74 |
|
---|
| 75 | ##
|
---|
| 76 | ## Add a parameter to a sequence or dictionary of parameters.
|
---|
| 77 | ##
|
---|
| 78 | def param_add( param_num, param_name, param, params ):
|
---|
| 79 | try:
|
---|
| 80 | params.append(param)
|
---|
| 81 | except AttributeError:
|
---|
| 82 | params[param_name] = param
|
---|
| 83 | return params
|
---|
| 84 |
|
---|
| 85 |
|
---|
| 86 | ##
|
---|
| 87 | ## This dictionary is used to lookup regular expressions for matching the parameter
|
---|
| 88 | ## name contained within the placeholder of a named paramstyle. This is currently
|
---|
| 89 | ## just named and pyformat.
|
---|
| 90 | ##
|
---|
| 91 | PARAMNAME_EXPS = {
|
---|
| 92 | 'named' : re.compile(':(\w+)'),
|
---|
| 93 | 'pyformat' : re.compile('%\((\w+)\)s'),
|
---|
| 94 | }
|
---|
| 95 |
|
---|
| 96 |
|
---|
| 97 | ##
|
---|
| 98 | ## Parameter name generators.
|
---|
| 99 | ##
|
---|
| 100 | PARAMNAME_GENS = {
|
---|
| 101 | 'qmark' : lambda param_num, placeholder : 'param%d' % (param_num),
|
---|
| 102 | 'numeric' : lambda param_num, placeholder : 'param%d' % (param_num),
|
---|
| 103 | 'named' : lambda param_num, placeholder : PARAMNAME_EXPS['named'].findall(placeholder)[0],
|
---|
| 104 | 'format' : lambda param_num, placeholder : 'param%d' % (param_num),
|
---|
| 105 | 'pyformat' : lambda param_num, placeholder : PARAMNAME_EXPS['pyformat'].findall(placeholder)[0],
|
---|
| 106 | }
|
---|
| 107 |
|
---|
| 108 |
|
---|
| 109 | ##
|
---|
| 110 | ## Parameter value generators.
|
---|
| 111 | ##
|
---|
| 112 | PARAMVALUE_GENS = {
|
---|
| 113 | 'qmark' : lambda param_num, param_name, params : params[param_num - 1],
|
---|
| 114 | 'numeric' : lambda param_num, param_name, params : params[param_num - 1],
|
---|
| 115 | 'named' : lambda param_num, param_name, params : params[param_name],
|
---|
| 116 | 'format' : lambda param_num, param_name, params : params[param_num - 1],
|
---|
| 117 | 'pyformat' : lambda param_num, param_name, params : params[param_name],
|
---|
| 118 | }
|
---|
| 119 |
|
---|
| 120 |
|
---|
| 121 | ##
|
---|
| 122 | ## This dictionary contains lambda functions that return the appropriate replacement
|
---|
| 123 | ## string given the parameter's sequence number.
|
---|
| 124 | ##
|
---|
| 125 | PLACEHOLDER_SUBS = {
|
---|
| 126 | 'qmark' : lambda param_num, param_name : '?',
|
---|
| 127 | 'numeric' : lambda param_num, param_name : ':%d' % (param_num),
|
---|
| 128 | 'named' : lambda param_num, param_name : ':%s' % (param_name),
|
---|
| 129 | 'format' : lambda param_num, param_name : '%s',
|
---|
| 130 | 'pyformat' : lambda param_num, param_name : '%%(%s)s' % (param_name),
|
---|
| 131 | }
|
---|
| 132 |
|
---|
| 133 |
|
---|
| 134 | ##
|
---|
| 135 | ## The following conversion matrix was inspired by similar code in Wichert Akkerman's dhm
|
---|
| 136 | ## module at http://www.wiggy.net/code/python-dhm.
|
---|
| 137 | ##
|
---|
| 138 | ## The primary reason for listing conversion algorithms in a lookup table like this is that
|
---|
| 139 | ## it allows for individual conversions to be overridden as additional, possibly experimental,
|
---|
| 140 | ## algorithms are developed without breaking the functionality of the entire module.
|
---|
| 141 | ##
|
---|
| 142 | CONVERSION_MATRIX = {
|
---|
| 143 | 'qmark' : {
|
---|
| 144 | 'qmark' : lambda query, params : (query, params),
|
---|
| 145 | 'numeric' : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'numeric', query, params ),
|
---|
| 146 | 'named' : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'named', query, params ),
|
---|
| 147 | 'format' : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'format', query, params ),
|
---|
| 148 | 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'pyformat', query, params ),
|
---|
| 149 | },
|
---|
| 150 | 'numeric' : {
|
---|
| 151 | 'qmark' : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'qmark', query, params ),
|
---|
| 152 | 'numeric' : lambda query, params : (query, params),
|
---|
| 153 | 'named' : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'named', query, params ),
|
---|
| 154 | 'format' : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'format', query, params ),
|
---|
| 155 | 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'pyformat', query, params ),
|
---|
| 156 | },
|
---|
| 157 | 'named' : {
|
---|
| 158 | 'qmark' : lambda query, params : paramstyle_to_paramstyle( 'named', 'qmark', query, params ),
|
---|
| 159 | 'numeric' : lambda query, params : paramstyle_to_paramstyle( 'named', 'numeric', query, params ),
|
---|
| 160 | 'named' : lambda query, params : (query, params),
|
---|
| 161 | 'format' : lambda query, params : paramstyle_to_paramstyle( 'named', 'format', query, params ),
|
---|
| 162 | 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'named', 'pyformat', query, params ),
|
---|
| 163 | },
|
---|
| 164 | 'format' : {
|
---|
| 165 | 'qmark' : lambda query, params : paramstyle_to_paramstyle( 'format', 'qmark', query, params ),
|
---|
| 166 | 'numeric' : lambda query, params : paramstyle_to_paramstyle( 'format', 'numeric', query, params ),
|
---|
| 167 | 'named' : lambda query, params : paramstyle_to_paramstyle( 'format', 'named', query, params ),
|
---|
| 168 | 'format' : lambda query, params : (query, params),
|
---|
| 169 | 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'format', 'pyformat', query, params ),
|
---|
| 170 | },
|
---|
| 171 | 'pyformat' : {
|
---|
| 172 | 'qmark' : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'qmark', query, params ),
|
---|
| 173 | 'numeric' : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'numeric', query, params ),
|
---|
| 174 | 'named' : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'named', query, params ),
|
---|
| 175 | 'format' : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'format', query, params ),
|
---|
| 176 | 'pyformat' : lambda query, params : (query, params),
|
---|
| 177 | },
|
---|
| 178 | }
|
---|
| 179 |
|
---|
| 180 | ##
|
---|
| 181 | ## Return True if the character at pos in string is escaped.
|
---|
| 182 | ##
|
---|
| 183 | def escaped( string, pos ):
|
---|
| 184 | escape_chars = ESCAPE_CHARS
|
---|
| 185 | count = 0
|
---|
| 186 | if pos > 0:
|
---|
| 187 | pos -= 1
|
---|
| 188 | if string[pos] in escape_chars:
|
---|
| 189 | escape_char = string[pos]
|
---|
| 190 | while string[pos] == escape_char and pos >= 0:
|
---|
| 191 | count += 1
|
---|
| 192 | pos -= 1
|
---|
| 193 | if count % 2 == 1:
|
---|
| 194 | return True
|
---|
| 195 | else:
|
---|
| 196 | return False
|
---|
| 197 |
|
---|
| 198 | ##
|
---|
| 199 | ## Return True if the string is quoted.
|
---|
| 200 | ##
|
---|
| 201 | def quoted( string ):
|
---|
| 202 | if string[0] in QUOTE_CHARS and string[-1] == string[0]:
|
---|
| 203 | return True
|
---|
| 204 | else:
|
---|
| 205 | return False
|
---|
| 206 |
|
---|
| 207 | ##
|
---|
| 208 | ##
|
---|
| 209 | ##
|
---|
| 210 | class SegmentizeError(Exception):
|
---|
| 211 | """
|
---|
| 212 | Error associated with string segmentization.
|
---|
| 213 | """
|
---|
| 214 | pass
|
---|
| 215 |
|
---|
| 216 |
|
---|
| 217 | ##
|
---|
| 218 | ## Parse a string into quoted and non-quoted segments. We do this so that it is easy to tell
|
---|
| 219 | ## which segments of a string to look for placeholders in and which to ignore.
|
---|
| 220 | ##
|
---|
| 221 | def segmentize( string ):
|
---|
| 222 | """
|
---|
| 223 | Split a string into quoted and non-quoted segments.
|
---|
| 224 | """
|
---|
| 225 | quote_chars = QUOTE_CHARS
|
---|
| 226 | segments = []
|
---|
| 227 | current_segment = ''
|
---|
| 228 | previous_char = None
|
---|
| 229 | quote_char = None
|
---|
| 230 | quoted = False
|
---|
| 231 | pos = 0
|
---|
| 232 | for char in string:
|
---|
| 233 | if quoted:
|
---|
| 234 | if char == quote_char and not escaped( string, pos ):
|
---|
| 235 | current_segment += char
|
---|
| 236 | segments.append(current_segment)
|
---|
| 237 | current_segment = ''
|
---|
| 238 | previous_char = char
|
---|
| 239 | quoted = False
|
---|
| 240 | else:
|
---|
| 241 | current_segment += char
|
---|
| 242 | previous_char = char
|
---|
| 243 | elif not quoted:
|
---|
| 244 | if char in quote_chars and not escaped( string, pos ):
|
---|
| 245 | if current_segment != '':
|
---|
| 246 | segments.append(current_segment)
|
---|
| 247 | current_segment = ''
|
---|
| 248 | quoted = True
|
---|
| 249 | quote_char = char
|
---|
| 250 | current_segment += char
|
---|
| 251 | previous_char = char
|
---|
| 252 | else:
|
---|
| 253 | current_segment += char
|
---|
| 254 | previous_char = char
|
---|
| 255 | pos += 1
|
---|
| 256 | if current_segment != '':
|
---|
| 257 | segments.append(current_segment)
|
---|
| 258 | if quoted:
|
---|
| 259 | raise SegmentizeError, 'Unmatched quotes in string'
|
---|
| 260 |
|
---|
| 261 | return segments
|
---|
| 262 |
|
---|
| 263 |
|
---|
| 264 | ##
|
---|
| 265 | ## Universal paramstyle converter. This is the initial conversion algorithm supplied with PyDAL.
|
---|
| 266 | ## It is intended to complete, but may not offer optimal performance in all cases.
|
---|
| 267 | ##
|
---|
| 268 | ## --PLB 2004-09-08
|
---|
| 269 | ##
|
---|
| 270 | def paramstyle_to_paramstyle( from_paramstyle, to_paramstyle, query, params ):
|
---|
| 271 | placeholder_exp = PLACEHOLDER_EXPS[from_paramstyle]
|
---|
| 272 | placeholder_sub = PLACEHOLDER_SUBS[to_paramstyle]
|
---|
| 273 | paramname_gen = PARAMNAME_GENS[from_paramstyle]
|
---|
| 274 | paramvalue_gen = PARAMVALUE_GENS[from_paramstyle]
|
---|
| 275 | new_query = ''
|
---|
| 276 | segments = segmentize(query)
|
---|
| 277 | new_params = PARAM_TYPES[to_paramstyle]()
|
---|
| 278 | param_num = 0
|
---|
| 279 | for segment in segments:
|
---|
| 280 | #
|
---|
| 281 | # If the segment is a quoted string, do not check for placeholders.
|
---|
| 282 | #
|
---|
| 283 | if quoted(segment):
|
---|
| 284 | new_query += segment
|
---|
| 285 | else:
|
---|
| 286 | #
|
---|
| 287 | # ...otherwise, check for any placeholder matches.
|
---|
| 288 | #
|
---|
| 289 | pos = 0
|
---|
| 290 | match = placeholder_exp.search(segment, pos)
|
---|
| 291 | if match != None:
|
---|
| 292 | #
|
---|
| 293 | # If there are placeholders...
|
---|
| 294 | #
|
---|
| 295 | while match != None:
|
---|
| 296 | new_query += segment[pos:match.start()]
|
---|
| 297 | placeholder = segment[match.start():match.end()]
|
---|
| 298 | #
|
---|
| 299 | # Ignore the placeholder if it is escaped...
|
---|
| 300 | #
|
---|
| 301 | if escaped( segment, match.start() ):
|
---|
| 302 | new_query += placeholder
|
---|
| 303 | else:
|
---|
| 304 | #
|
---|
| 305 | # ...otherwise replace it.
|
---|
| 306 | #
|
---|
| 307 | param_num += 1
|
---|
| 308 | param_name = paramname_gen( param_num, placeholder )
|
---|
| 309 | param_value = paramvalue_gen( param_num, param_name, params )
|
---|
| 310 | new_placeholder = placeholder_sub( param_num, param_name )
|
---|
| 311 | new_query += new_placeholder
|
---|
| 312 | new_params = param_add( param_num, param_name, param_value, new_params )
|
---|
| 313 | pos = match.end()
|
---|
| 314 | match = placeholder_exp.search(segment, pos)
|
---|
| 315 | #
|
---|
| 316 | # Tack on the end of the string segment when there are no more matches.
|
---|
| 317 | #
|
---|
| 318 | if pos < len(segment):
|
---|
| 319 | new_query += segment[pos:]
|
---|
| 320 | #
|
---|
| 321 | # If there were no placeholders, just add the segment to our query.
|
---|
| 322 | #
|
---|
| 323 | else:
|
---|
| 324 | new_query += segment
|
---|
| 325 |
|
---|
| 326 | return new_query, new_params
|
---|
| 327 |
|
---|
| 328 |
|
---|
| 329 | ##
|
---|
| 330 | ## Convert from any paramstyle to any other paramstyle.
|
---|
| 331 | ##
|
---|
| 332 | def convert( from_paramstyle, to_paramstyle, query, params ):
|
---|
| 333 |
|
---|
| 334 | try:
|
---|
| 335 | convert_function = CONVERSION_MATRIX[from_paramstyle][to_paramstyle]
|
---|
| 336 | except KeyError:
|
---|
| 337 | raise NotImplementedError, 'Unsupported paramstyle conversion: %s to %s' % (from_paramstyle, to_paramstyle)
|
---|
| 338 |
|
---|
| 339 | new_query, new_params = convert_function(query, params)
|
---|
| 340 |
|
---|
| 341 | return new_query, new_params
|
---|
| 342 |
|
---|
| 343 | ##
|
---|
| 344 | ## Unit Tests
|
---|
| 345 | ##
|
---|
| 346 | ## Need to move these to the python unit testing framework...
|
---|
| 347 | ##
|
---|
| 348 | if __name__ == '__main__':
|
---|
| 349 | sequence_params = ['a', 'b', 'c', 'd']
|
---|
| 350 | dict_params = {
|
---|
| 351 | 'foo' : 'a',
|
---|
| 352 | 'bar' : 'b',
|
---|
| 353 | 'baz' : 'c',
|
---|
| 354 | 'quux' : 'd',
|
---|
| 355 | }
|
---|
| 356 | tests = {
|
---|
| 357 | 'qmark' : [ 'SELECT * FROM ? WHERE ? > ? OR ? IS NOT NULL', sequence_params ],
|
---|
| 358 | 'numeric' : [ 'SELECT * FROM :1 WHERE :2 > :3 OR :4 IS NOT NULL', sequence_params ],
|
---|
| 359 | 'named' : [ 'SELECT * FROM :foo WHERE :bar > :baz OR :quux IS NOT NULL', dict_params ],
|
---|
| 360 | 'format' : [ 'SELECT * FROM %s WHERE %s > %s OR %s IS NOT NULL', sequence_params ],
|
---|
| 361 | 'pyformat' : [ 'SELECT * FROM %(foo)s WHERE %(bar)s > %(baz)s OR %(quux)s IS NOT NULL', dict_params ],
|
---|
| 362 | }
|
---|
| 363 | indent = 4
|
---|
| 364 | width = 16
|
---|
| 365 | print ''
|
---|
| 366 | print '[ PARAMSTYLE TRANSLATIONS ]'
|
---|
| 367 | print ''
|
---|
| 368 | for from_paramstyle in PARAMSTYLES['all']:
|
---|
| 369 | query = tests[from_paramstyle][0]
|
---|
| 370 | params = tests[from_paramstyle][1]
|
---|
| 371 | print ''
|
---|
| 372 | print '%s[ %s ]' % (' ' * indent, from_paramstyle.upper())
|
---|
| 373 | print ''
|
---|
| 374 | label = 'query'
|
---|
| 375 | print '%s%s%s: %s' % (' ' * indent, label, '.' * (width + indent - len(label)), query)
|
---|
| 376 | label = 'paramstyle'
|
---|
| 377 | print '%s%s%s: %s' % (' ' * indent, label, '.' * (width + indent - len(label)), from_paramstyle)
|
---|
| 378 | print ''
|
---|
| 379 | for to_paramstyle in PARAMSTYLES['all']:
|
---|
| 380 | converted_query, converted_params = convert(from_paramstyle, to_paramstyle, query, params)
|
---|
| 381 | label = '%s_query' % (to_paramstyle)
|
---|
| 382 | print '%s%s%s: %s' % (' ' * indent * 2, label, '.' * (width - len(label)), converted_query)
|
---|
| 383 | label = '%s_params' % (to_paramstyle)
|
---|
| 384 | print '%s%s%s: %s' % (' ' * indent * 2, label, '.' * (width - len(label)), converted_params)
|
---|
| 385 | print ''
|
---|
| 386 |
|
---|