| 1 | ############################################################################### | 
|---|
| 2 | ### | 
|---|
| 3 | ### Python DBAPI 2.0 Paramstyle Conversions | 
|---|
| 4 | ### | 
|---|
| 5 | ### Peter L. Buschman <plb@iotk.com> | 
|---|
| 6 | ### | 
|---|
| 7 | ### 2004-08-30 | 
|---|
| 8 | ### | 
|---|
| 9 | ############################################################################### | 
|---|
| 10 |  | 
|---|
| 11 | import string | 
|---|
| 12 | import re | 
|---|
| 13 |  | 
|---|
| 14 |  | 
|---|
| 15 | ## | 
|---|
| 16 | ## Categorization of paramstyles into different categories.  Different types of conversions | 
|---|
| 17 | ## will have different optimal algorithms. | 
|---|
| 18 | ## | 
|---|
| 19 | PARAMSTYLES    = { | 
|---|
| 20 | 'all'      : [ 'qmark', 'numeric', 'named', 'format', 'pyformat' ], | 
|---|
| 21 | 'sequence' : [ 'qmark', 'numeric', 'format' ], | 
|---|
| 22 | 'dict'     : [ 'named', 'pyformat' ], | 
|---|
| 23 | 'token'    : [ 'qmark', 'format' ], | 
|---|
| 24 | } | 
|---|
| 25 |  | 
|---|
| 26 |  | 
|---|
| 27 | ## | 
|---|
| 28 | ## Valid quote characters. | 
|---|
| 29 | ## | 
|---|
| 30 | QUOTE_CHARS = [ '"', "'" ]  # " or ' | 
|---|
| 31 |  | 
|---|
| 32 |  | 
|---|
| 33 | ## | 
|---|
| 34 | ## Valid escape characters. | 
|---|
| 35 | ## | 
|---|
| 36 | ESCAPE_CHARS = [ '\\' ] | 
|---|
| 37 |  | 
|---|
| 38 |  | 
|---|
| 39 | ## | 
|---|
| 40 | ## This dictionary is used to lookup the placeholder strings for paramstyles whose | 
|---|
| 41 | ## placeholders are always the same.  This is currently just qmark and format. | 
|---|
| 42 | ## | 
|---|
| 43 | PLACEHOLDER_TOKENS = { | 
|---|
| 44 | 'qmark'    : '?', | 
|---|
| 45 | 'format'   : '%s', | 
|---|
| 46 | } | 
|---|
| 47 |  | 
|---|
| 48 |  | 
|---|
| 49 | ## | 
|---|
| 50 | ## This dictionary is used to look-up regular expressions for matching placeholders | 
|---|
| 51 | ## in a query string for a given paramstyle. | 
|---|
| 52 | ## | 
|---|
| 53 | PLACEHOLDER_EXPS = { | 
|---|
| 54 | 'qmark'    : re.compile(r'(\?)'), | 
|---|
| 55 | 'numeric'  : re.compile(r'(:\d+)'), | 
|---|
| 56 | 'named'    : re.compile(r'(:\w+)'), | 
|---|
| 57 | 'format'   : re.compile(r'(%s)'), | 
|---|
| 58 | 'pyformat' : re.compile(r'(%\(\w+\)s)'), | 
|---|
| 59 | } | 
|---|
| 60 |  | 
|---|
| 61 |  | 
|---|
| 62 | ## | 
|---|
| 63 | ## This dictionary is used to get the correct datastructure to return params in when converting | 
|---|
| 64 | ## from one paramstyle to another. | 
|---|
| 65 | ## | 
|---|
| 66 | PARAM_TYPES = { | 
|---|
| 67 | 'qmark'    : lambda : [], | 
|---|
| 68 | 'numeric'  : lambda : [], | 
|---|
| 69 | 'named'    : lambda : {}, | 
|---|
| 70 | 'format'   : lambda : [], | 
|---|
| 71 | 'pyformat' : lambda : {}, | 
|---|
| 72 | } | 
|---|
| 73 |  | 
|---|
| 74 |  | 
|---|
| 75 | ## | 
|---|
| 76 | ## Add a parameter to a sequence or dictionary of parameters. | 
|---|
| 77 | ## | 
|---|
| 78 | def param_add( param_num, param_name, param, params ): | 
|---|
| 79 | try: | 
|---|
| 80 | params.append(param) | 
|---|
| 81 | except AttributeError: | 
|---|
| 82 | params[param_name] = param | 
|---|
| 83 | return params | 
|---|
| 84 |  | 
|---|
| 85 |  | 
|---|
| 86 | ## | 
|---|
| 87 | ## This dictionary is used to lookup regular expressions for matching the parameter | 
|---|
| 88 | ## name contained within the placeholder of a named paramstyle.  This is currently | 
|---|
| 89 | ## just named and pyformat. | 
|---|
| 90 | ## | 
|---|
| 91 | PARAMNAME_EXPS = { | 
|---|
| 92 | 'named'    : re.compile(':(\w+)'), | 
|---|
| 93 | 'pyformat' : re.compile('%\((\w+)\)s'), | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 |  | 
|---|
| 97 | ## | 
|---|
| 98 | ## Parameter name generators. | 
|---|
| 99 | ## | 
|---|
| 100 | PARAMNAME_GENS = { | 
|---|
| 101 | 'qmark'    : lambda param_num, placeholder : 'param%d' % (param_num), | 
|---|
| 102 | 'numeric'  : lambda param_num, placeholder : 'param%d' % (param_num), | 
|---|
| 103 | 'named'    : lambda param_num, placeholder : PARAMNAME_EXPS['named'].findall(placeholder)[0], | 
|---|
| 104 | 'format'   : lambda param_num, placeholder : 'param%d' % (param_num), | 
|---|
| 105 | 'pyformat' : lambda param_num, placeholder : PARAMNAME_EXPS['pyformat'].findall(placeholder)[0], | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 |  | 
|---|
| 109 | ## | 
|---|
| 110 | ## Parameter value generators. | 
|---|
| 111 | ## | 
|---|
| 112 | PARAMVALUE_GENS = { | 
|---|
| 113 | 'qmark'    : lambda param_num, param_name, params : params[param_num - 1], | 
|---|
| 114 | 'numeric'  : lambda param_num, param_name, params : params[param_num - 1], | 
|---|
| 115 | 'named'    : lambda param_num, param_name, params : params[param_name], | 
|---|
| 116 | 'format'   : lambda param_num, param_name, params : params[param_num - 1], | 
|---|
| 117 | 'pyformat' : lambda param_num, param_name, params : params[param_name], | 
|---|
| 118 | } | 
|---|
| 119 |  | 
|---|
| 120 |  | 
|---|
| 121 | ## | 
|---|
| 122 | ## This dictionary contains lambda functions that return the appropriate replacement | 
|---|
| 123 | ## string given the parameter's sequence number. | 
|---|
| 124 | ## | 
|---|
| 125 | PLACEHOLDER_SUBS = { | 
|---|
| 126 | 'qmark'     : lambda param_num, param_name : '?', | 
|---|
| 127 | 'numeric'   : lambda param_num, param_name : ':%d' % (param_num), | 
|---|
| 128 | 'named'     : lambda param_num, param_name : ':%s' % (param_name), | 
|---|
| 129 | 'format'    : lambda param_num, param_name : '%s', | 
|---|
| 130 | 'pyformat'  : lambda param_num, param_name : '%%(%s)s' % (param_name), | 
|---|
| 131 | } | 
|---|
| 132 |  | 
|---|
| 133 |  | 
|---|
| 134 | ## | 
|---|
| 135 | ## The following conversion matrix was inspired by similar code in Wichert Akkerman's dhm | 
|---|
| 136 | ## module at http://www.wiggy.net/code/python-dhm. | 
|---|
| 137 | ## | 
|---|
| 138 | ## The primary reason for listing conversion algorithms in a lookup table like this is that | 
|---|
| 139 | ## it allows for individual conversions to be overridden as additional, possibly experimental, | 
|---|
| 140 | ## algorithms are developed without breaking the functionality of the entire module. | 
|---|
| 141 | ## | 
|---|
| 142 | CONVERSION_MATRIX = { | 
|---|
| 143 | 'qmark' : { | 
|---|
| 144 | 'qmark'    : lambda query, params : (query, params), | 
|---|
| 145 | 'numeric'  : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'numeric', query, params ), | 
|---|
| 146 | 'named'    : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'named', query, params ), | 
|---|
| 147 | 'format'   : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'format', query, params ), | 
|---|
| 148 | 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'qmark', 'pyformat', query, params ), | 
|---|
| 149 | }, | 
|---|
| 150 | 'numeric' : { | 
|---|
| 151 | 'qmark'    : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'qmark', query, params ), | 
|---|
| 152 | 'numeric'  : lambda query, params : (query, params), | 
|---|
| 153 | 'named'    : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'named', query, params ), | 
|---|
| 154 | 'format'   : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'format', query, params ), | 
|---|
| 155 | 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'numeric', 'pyformat', query, params ), | 
|---|
| 156 | }, | 
|---|
| 157 | 'named' : { | 
|---|
| 158 | 'qmark'    : lambda query, params : paramstyle_to_paramstyle( 'named', 'qmark', query, params ), | 
|---|
| 159 | 'numeric'  : lambda query, params : paramstyle_to_paramstyle( 'named', 'numeric', query, params ), | 
|---|
| 160 | 'named'    : lambda query, params : (query, params), | 
|---|
| 161 | 'format'   : lambda query, params : paramstyle_to_paramstyle( 'named', 'format', query, params ), | 
|---|
| 162 | 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'named', 'pyformat', query, params ), | 
|---|
| 163 | }, | 
|---|
| 164 | 'format' : { | 
|---|
| 165 | 'qmark'    : lambda query, params : paramstyle_to_paramstyle( 'format', 'qmark', query, params ), | 
|---|
| 166 | 'numeric'  : lambda query, params : paramstyle_to_paramstyle( 'format', 'numeric', query, params ), | 
|---|
| 167 | 'named'    : lambda query, params : paramstyle_to_paramstyle( 'format', 'named', query, params ), | 
|---|
| 168 | 'format'   : lambda query, params : (query, params), | 
|---|
| 169 | 'pyformat' : lambda query, params : paramstyle_to_paramstyle( 'format', 'pyformat', query, params ), | 
|---|
| 170 | }, | 
|---|
| 171 | 'pyformat' : { | 
|---|
| 172 | 'qmark'    : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'qmark', query, params ), | 
|---|
| 173 | 'numeric'  : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'numeric', query, params ), | 
|---|
| 174 | 'named'    : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'named', query, params ), | 
|---|
| 175 | 'format'   : lambda query, params : paramstyle_to_paramstyle( 'pyformat', 'format', query, params ), | 
|---|
| 176 | 'pyformat' : lambda query, params : (query, params), | 
|---|
| 177 | }, | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | ## | 
|---|
| 181 | ## Return True if the character at pos in string is escaped. | 
|---|
| 182 | ## | 
|---|
| 183 | def escaped( string, pos ): | 
|---|
| 184 | escape_chars = ESCAPE_CHARS | 
|---|
| 185 | count = 0 | 
|---|
| 186 | if pos > 0: | 
|---|
| 187 | pos -= 1 | 
|---|
| 188 | if string[pos] in escape_chars: | 
|---|
| 189 | escape_char = string[pos] | 
|---|
| 190 | while string[pos] == escape_char and pos >= 0: | 
|---|
| 191 | count += 1 | 
|---|
| 192 | pos -= 1 | 
|---|
| 193 | if count % 2 == 1: | 
|---|
| 194 | return True | 
|---|
| 195 | else: | 
|---|
| 196 | return False | 
|---|
| 197 |  | 
|---|
| 198 | ## | 
|---|
| 199 | ## Return True if the string is quoted. | 
|---|
| 200 | ## | 
|---|
| 201 | def quoted( string ): | 
|---|
| 202 | if string[0] in QUOTE_CHARS and string[-1] == string[0]: | 
|---|
| 203 | return True | 
|---|
| 204 | else: | 
|---|
| 205 | return False | 
|---|
| 206 |  | 
|---|
| 207 | ## | 
|---|
| 208 | ## | 
|---|
| 209 | ## | 
|---|
| 210 | class SegmentizeError(Exception): | 
|---|
| 211 | """ | 
|---|
| 212 | Error associated with string segmentization. | 
|---|
| 213 | """ | 
|---|
| 214 | pass | 
|---|
| 215 |  | 
|---|
| 216 |  | 
|---|
| 217 | ## | 
|---|
| 218 | ## Parse a string into quoted and non-quoted segments.  We do this so that it is easy to tell | 
|---|
| 219 | ## which segments of a string to look for placeholders in and which to ignore. | 
|---|
| 220 | ## | 
|---|
| 221 | def segmentize( string ): | 
|---|
| 222 | """ | 
|---|
| 223 | Split a string into quoted and non-quoted segments. | 
|---|
| 224 | """ | 
|---|
| 225 | quote_chars     = QUOTE_CHARS | 
|---|
| 226 | segments        = [] | 
|---|
| 227 | current_segment = '' | 
|---|
| 228 | previous_char   = None | 
|---|
| 229 | quote_char      = None | 
|---|
| 230 | quoted          = False | 
|---|
| 231 | pos             = 0 | 
|---|
| 232 | for char in string: | 
|---|
| 233 | if quoted: | 
|---|
| 234 | if char == quote_char and not escaped( string, pos ): | 
|---|
| 235 | current_segment += char | 
|---|
| 236 | segments.append(current_segment) | 
|---|
| 237 | current_segment = '' | 
|---|
| 238 | previous_char = char | 
|---|
| 239 | quoted = False | 
|---|
| 240 | else: | 
|---|
| 241 | current_segment += char | 
|---|
| 242 | previous_char = char | 
|---|
| 243 | elif not quoted: | 
|---|
| 244 | if char in quote_chars and not escaped( string, pos ): | 
|---|
| 245 | if current_segment != '': | 
|---|
| 246 | segments.append(current_segment) | 
|---|
| 247 | current_segment = '' | 
|---|
| 248 | quoted = True | 
|---|
| 249 | quote_char = char | 
|---|
| 250 | current_segment += char | 
|---|
| 251 | previous_char = char | 
|---|
| 252 | else: | 
|---|
| 253 | current_segment += char | 
|---|
| 254 | previous_char = char | 
|---|
| 255 | pos += 1 | 
|---|
| 256 | if current_segment != '': | 
|---|
| 257 | segments.append(current_segment) | 
|---|
| 258 | if quoted: | 
|---|
| 259 | raise SegmentizeError, 'Unmatched quotes in string' | 
|---|
| 260 |  | 
|---|
| 261 | return segments | 
|---|
| 262 |  | 
|---|
| 263 |  | 
|---|
| 264 | ## | 
|---|
| 265 | ## Universal paramstyle converter.  This is the initial conversion algorithm supplied with PyDAL. | 
|---|
| 266 | ## It is intended to complete, but may not offer optimal performance in all cases. | 
|---|
| 267 | ## | 
|---|
| 268 | ## --PLB 2004-09-08 | 
|---|
| 269 | ## | 
|---|
| 270 | def paramstyle_to_paramstyle( from_paramstyle, to_paramstyle, query, params ): | 
|---|
| 271 | placeholder_exp = PLACEHOLDER_EXPS[from_paramstyle] | 
|---|
| 272 | placeholder_sub = PLACEHOLDER_SUBS[to_paramstyle] | 
|---|
| 273 | paramname_gen   = PARAMNAME_GENS[from_paramstyle] | 
|---|
| 274 | paramvalue_gen  = PARAMVALUE_GENS[from_paramstyle] | 
|---|
| 275 | new_query = '' | 
|---|
| 276 | segments = segmentize(query) | 
|---|
| 277 | new_params = PARAM_TYPES[to_paramstyle]() | 
|---|
| 278 | param_num = 0 | 
|---|
| 279 | for segment in segments: | 
|---|
| 280 | # | 
|---|
| 281 | # If the segment is a quoted string, do not check for placeholders. | 
|---|
| 282 | # | 
|---|
| 283 | if quoted(segment): | 
|---|
| 284 | new_query += segment | 
|---|
| 285 | else: | 
|---|
| 286 | # | 
|---|
| 287 | # ...otherwise, check for any placeholder matches. | 
|---|
| 288 | # | 
|---|
| 289 | pos = 0 | 
|---|
| 290 | match = placeholder_exp.search(segment, pos) | 
|---|
| 291 | if match != None: | 
|---|
| 292 | # | 
|---|
| 293 | # If there are placeholders... | 
|---|
| 294 | # | 
|---|
| 295 | while match != None: | 
|---|
| 296 | new_query += segment[pos:match.start()] | 
|---|
| 297 | placeholder = segment[match.start():match.end()] | 
|---|
| 298 | # | 
|---|
| 299 | # Ignore the placeholder if it is escaped... | 
|---|
| 300 | # | 
|---|
| 301 | if escaped( segment, match.start() ): | 
|---|
| 302 | new_query += placeholder | 
|---|
| 303 | else: | 
|---|
| 304 | # | 
|---|
| 305 | # ...otherwise replace it. | 
|---|
| 306 | # | 
|---|
| 307 | param_num += 1 | 
|---|
| 308 | param_name  = paramname_gen( param_num, placeholder ) | 
|---|
| 309 | param_value = paramvalue_gen( param_num, param_name, params ) | 
|---|
| 310 | new_placeholder = placeholder_sub( param_num, param_name ) | 
|---|
| 311 | new_query += new_placeholder | 
|---|
| 312 | new_params = param_add( param_num, param_name, param_value, new_params ) | 
|---|
| 313 | pos = match.end() | 
|---|
| 314 | match = placeholder_exp.search(segment, pos) | 
|---|
| 315 | # | 
|---|
| 316 | # Tack on the end of the string segment when there are no more matches. | 
|---|
| 317 | # | 
|---|
| 318 | if pos < len(segment): | 
|---|
| 319 | new_query += segment[pos:] | 
|---|
| 320 | # | 
|---|
| 321 | # If there were no placeholders, just add the segment to our query. | 
|---|
| 322 | # | 
|---|
| 323 | else: | 
|---|
| 324 | new_query += segment | 
|---|
| 325 |  | 
|---|
| 326 | return new_query, new_params | 
|---|
| 327 |  | 
|---|
| 328 |  | 
|---|
| 329 | ## | 
|---|
| 330 | ## Convert from any paramstyle to any other paramstyle. | 
|---|
| 331 | ## | 
|---|
| 332 | def convert( from_paramstyle, to_paramstyle, query, params ): | 
|---|
| 333 |  | 
|---|
| 334 | try: | 
|---|
| 335 | convert_function = CONVERSION_MATRIX[from_paramstyle][to_paramstyle] | 
|---|
| 336 | except KeyError: | 
|---|
| 337 | raise NotImplementedError, 'Unsupported paramstyle conversion: %s to %s' % (from_paramstyle, to_paramstyle) | 
|---|
| 338 |  | 
|---|
| 339 | new_query, new_params = convert_function(query, params) | 
|---|
| 340 |  | 
|---|
| 341 | return new_query, new_params | 
|---|
| 342 |  | 
|---|
| 343 | ## | 
|---|
| 344 | ## Unit Tests | 
|---|
| 345 | ## | 
|---|
| 346 | ## Need to move these to the python unit testing framework... | 
|---|
| 347 | ## | 
|---|
| 348 | if __name__ == '__main__': | 
|---|
| 349 | sequence_params = ['a', 'b', 'c', 'd'] | 
|---|
| 350 | dict_params = { | 
|---|
| 351 | 'foo'  : 'a', | 
|---|
| 352 | 'bar'  : 'b', | 
|---|
| 353 | 'baz'  : 'c', | 
|---|
| 354 | 'quux' : 'd', | 
|---|
| 355 | } | 
|---|
| 356 | tests = { | 
|---|
| 357 | 'qmark'    : [ 'SELECT * FROM ? WHERE ? > ? OR ? IS NOT NULL', sequence_params ], | 
|---|
| 358 | 'numeric'  : [ 'SELECT * FROM :1 WHERE :2 > :3 OR :4 IS NOT NULL', sequence_params ], | 
|---|
| 359 | 'named'    : [ 'SELECT * FROM :foo WHERE :bar > :baz OR :quux IS NOT NULL', dict_params ], | 
|---|
| 360 | 'format'   : [ 'SELECT * FROM %s WHERE %s > %s OR %s IS NOT NULL', sequence_params ], | 
|---|
| 361 | 'pyformat' : [ 'SELECT * FROM %(foo)s WHERE %(bar)s > %(baz)s OR %(quux)s IS NOT NULL', dict_params ], | 
|---|
| 362 | } | 
|---|
| 363 | indent = 4 | 
|---|
| 364 | width = 16 | 
|---|
| 365 | print '' | 
|---|
| 366 | print '[ PARAMSTYLE TRANSLATIONS ]' | 
|---|
| 367 | print '' | 
|---|
| 368 | for from_paramstyle in PARAMSTYLES['all']: | 
|---|
| 369 | query  = tests[from_paramstyle][0] | 
|---|
| 370 | params = tests[from_paramstyle][1] | 
|---|
| 371 | print '' | 
|---|
| 372 | print '%s[ %s ]' % (' ' * indent, from_paramstyle.upper()) | 
|---|
| 373 | print '' | 
|---|
| 374 | label = 'query' | 
|---|
| 375 | print '%s%s%s: %s' % (' ' * indent, label, '.' * (width + indent - len(label)), query) | 
|---|
| 376 | label = 'paramstyle' | 
|---|
| 377 | print '%s%s%s: %s' % (' ' * indent, label, '.' * (width + indent - len(label)), from_paramstyle) | 
|---|
| 378 | print '' | 
|---|
| 379 | for to_paramstyle in PARAMSTYLES['all']: | 
|---|
| 380 | converted_query, converted_params = convert(from_paramstyle, to_paramstyle, query, params) | 
|---|
| 381 | label = '%s_query' % (to_paramstyle) | 
|---|
| 382 | print '%s%s%s: %s' % (' ' * indent * 2, label, '.' * (width - len(label)), converted_query) | 
|---|
| 383 | label = '%s_params' % (to_paramstyle) | 
|---|
| 384 | print '%s%s%s: %s' % (' ' * indent * 2, label, '.' * (width - len(label)), converted_params) | 
|---|
| 385 | print '' | 
|---|
| 386 |  | 
|---|