# HG changeset patch
# User Sylvain Thénault <sylvain.thenault@logilab.fr>
# Date 1310111844 -7200
# Fri Jul 08 09:57:24 2011 +0200
# Node ID 00766652f2a1163078a347946433d0dfb7347302
# Parent 2d13a436924557726d71a7dba6ea337433f96748
closes #69993: Additional string format checks for logging module
# User Sylvain Thénault <sylvain.thenault@logilab.fr>
# Date 1310111844 -7200
# Fri Jul 08 09:57:24 2011 +0200
# Node ID 00766652f2a1163078a347946433d0dfb7347302
# Parent 2d13a436924557726d71a7dba6ea337433f96748
closes #69993: Additional string format checks for logging module
@@ -1,9 +1,14 @@
1 ChangeLog for PyLint 2 ==================== 3 4 -- 5 + 6 + * #69993: Additional string format checks for logging module: 7 + check for missing arguments, too many arguments, or invalid string 8 + formats in the logging checker module. Contributed by Daniel Arena 9 + 10 * #69220: add column offset to the reports. If you've a custom reporter, 11 this change may break it has now location gain a new item giving the 12 column offset. 13 14 2011-01-11 -- 0.23.0
@@ -15,37 +15,48 @@
15 """ 16 17 from logilab import astng 18 from pylint import checkers 19 from pylint import interfaces 20 +from pylint.checkers import utils 21 22 -EAGER_STRING_INTERPOLATION = 'W6501' 23 + 24 +MSGS = { 25 + 'W6501': ('Specify string format arguments as logging function parameters', 26 + 'Used when a logging statement has a call form of ' 27 + '"logging.<logging method>(format_string % (format_args...))". ' 28 + 'Such calls should leave string interpolation to the logging ' 29 + 'method itself and be written ' 30 + '"logging.<logging method>(format_string, format_args...)" ' 31 + 'so that the program may avoid incurring the cost of the ' 32 + 'interpolation in those cases in which no message will be ' 33 + 'logged. For more, see ' 34 + 'http://www.python.org/dev/peps/pep-0282/.'), 35 + 'E6500': ('Unsupported logging format character %r (%#02x) at index %d', 36 + 'Used when an unsupported format character is used in a logging\ 37 + statement format string.'), 38 + 'E6501': ('Logging format string ends in middle of conversion specifier', 39 + 'Used when a logging statement format string terminates before\ 40 + the end of a conversion specifier.'), 41 + 'E6505': ('Too many arguments for logging format string', 42 + 'Used when a logging format string is given too few arguments.'), 43 + 'E6506': ('Not enough arguments for logging format string', 44 + 'Used when a logging format string is given too many arguments'), 45 + } 46 + 47 48 CHECKED_CONVENIENCE_FUNCTIONS = set([ 49 'critical', 'debug', 'error', 'exception', 'fatal', 'info', 'warn', 50 'warning']) 51 52 53 class LoggingChecker(checkers.BaseChecker): 54 """Checks use of the logging module.""" 55 56 __implements__ = interfaces.IASTNGChecker 57 - 58 name = 'logging' 59 - 60 - msgs = {EAGER_STRING_INTERPOLATION: 61 - ('Specify string format arguments as logging function parameters', 62 - 'Used when a logging statement has a call form of ' 63 - '"logging.<logging method>(format_string % (format_args...))". ' 64 - 'Such calls should leave string interpolation to the logging ' 65 - 'method itself and be written ' 66 - '"logging.<logging method>(format_string, format_args...)" ' 67 - 'so that the program may avoid incurring the cost of the ' 68 - 'interpolation in those cases in which no message will be ' 69 - 'logged. For more, see ' 70 - 'http://www.python.org/dev/peps/pep-0282/.') 71 - } 72 + msgs = MSGS 73 74 def visit_module(self, unused_node): 75 """Clears any state left in this checker from last module checked.""" 76 # The code being checked can just as easily "import logging as foo", 77 # so it is necessary to process the imports and store in this field
@@ -65,33 +76,90 @@
78 """Checks calls to (simple forms of) logging methods.""" 79 if (not isinstance(node.func, astng.Getattr) 80 or not isinstance(node.func.expr, astng.Name) 81 or node.func.expr.name != self._logging_name): 82 return 83 - self._CheckConvenienceMethods(node) 84 - self._CheckLogMethod(node) 85 + self._check_convenience_methods(node) 86 + self._check_log_methods(node) 87 88 - def _CheckConvenienceMethods(self, node): 89 + def _check_convenience_methods(self, node): 90 """Checks calls to logging convenience methods (like logging.warn).""" 91 if node.func.attrname not in CHECKED_CONVENIENCE_FUNCTIONS: 92 return 93 - if not node.args: 94 - # Either no args, or star args, or double-star args. Beyond the 95 - # scope of this checker in any case. 96 + if node.starargs or node.kwargs or not node.args: 97 + # Either no args, star args, or double-star args. Beyond the 98 + # scope of this checker. 99 return 100 if isinstance(node.args[0], astng.BinOp) and node.args[0].op == '%': 101 - self.add_message(EAGER_STRING_INTERPOLATION, node=node) 102 + self.add_message('W6501', node=node) 103 + elif isinstance(node.args[0], astng.Const): 104 + self._check_format_string(node, 0) 105 106 - def _CheckLogMethod(self, node): 107 + def _check_log_methods(self, node): 108 """Checks calls to logging.log(level, format, *format_args).""" 109 if node.func.attrname != 'log': 110 return 111 - if len(node.args) < 2: 112 - # Either a malformed call or something with crazy star args or 113 - # double-star args magic. Beyond the scope of this checker. 114 + if node.starargs or node.kwargs or len(node.args) < 2: 115 + # Either a malformed call, star args, or double-star args. Beyond 116 + # the scope of this checker. 117 return 118 if isinstance(node.args[1], astng.BinOp) and node.args[1].op == '%': 119 - self.add_message(EAGER_STRING_INTERPOLATION, node=node) 120 + self.add_message('W6501', node=node) 121 + elif isinstance(node.args[1], astng.Const): 122 + self._check_format_string(node, 1) 123 + 124 + def _check_format_string(self, node, format_arg): 125 + """Checks that format string tokens match the supplied arguments. 126 + 127 + Args: 128 + node: AST node to be checked. 129 + format_arg: Index of the format string in the node arguments. 130 + """ 131 + num_args = self._count_supplied_tokens(node.args[format_arg + 1:]) 132 + if not num_args: 133 + # If no args were supplied, then all format strings are valid - 134 + # don't check any further. 135 + return 136 + format_string = node.args[format_arg].value 137 + if not isinstance(format_string, basestring): 138 + # If the log format is constant non-string (e.g. logging.debug(5)), 139 + # ensure there are no arguments. 140 + required_num_args = 0 141 + else: 142 + try: 143 + keyword_args, required_num_args = \ 144 + utils.parse_format_string(format_string) 145 + if keyword_args: 146 + # Keyword checking on logging strings is complicated by 147 + # special keywords - out of scope. 148 + return 149 + except utils.UnsupportedFormatCharacter, e: 150 + c = format_string[e.index] 151 + self.add_message('E6500', node=node, args=(c, ord(c), e.index)) 152 + return 153 + except utils.IncompleteFormatString: 154 + self.add_message('E6501', node=node) 155 + return 156 + if num_args > required_num_args: 157 + self.add_message('E6505', node=node) 158 + elif num_args < required_num_args: 159 + self.add_message('E6506', node=node) 160 + 161 + def _count_supplied_tokens(self, args): 162 + """Counts the number of tokens in an args list. 163 + 164 + The Python log functions allow for special keyword arguments: func, 165 + exc_info and extra. To handle these cases correctly, we only count 166 + arguments that aren't keywords. 167 + 168 + Args: 169 + args: List of AST nodes that are arguments for a log format string. 170 + 171 + Returns: 172 + Number of AST nodes that aren't keywords. 173 + """ 174 + return sum(1 for arg in args if not isinstance(arg, astng.Keyword)) 175 + 176 177 def register(linter): 178 """Required method to auto-register this checker.""" 179 linter.register_checker(LoggingChecker(linter))
@@ -20,10 +20,12 @@
180 181 import string 182 from logilab import astng 183 from pylint.interfaces import IASTNGChecker 184 from pylint.checkers import BaseChecker 185 +from pylint.checkers import utils 186 + 187 188 MSGS = { 189 'E9900': ("Unsupported format character %r (%#02x) at index %d", 190 "Used when a unsupported format character is used in a format\ 191 string."),
@@ -55,86 +57,10 @@
192 'E9906': ("Not enough arguments for format string", 193 "Used when a format string that uses unnamed conversion \ 194 specifiers is given too many arguments"), 195 } 196 197 -class IncompleteFormatString(Exception): 198 - """A format string ended in the middle of a format specifier.""" 199 - pass 200 - 201 -class UnsupportedFormatCharacter(Exception): 202 - """A format character in a format string is not one of the supported 203 - format characters.""" 204 - def __init__(self, index): 205 - Exception.__init__(self, index) 206 - self.index = index 207 - 208 -def parse_format_string(format_string): 209 - """Parses a format string, returning a tuple of (keys, num_args), where keys 210 - is the set of mapping keys in the format string, and num_args is the number 211 - of arguments required by the format string. Raises 212 - IncompleteFormatString or UnsupportedFormatCharacter if a 213 - parse error occurs.""" 214 - keys = set() 215 - num_args = 0 216 - def next_char(i): 217 - i += 1 218 - if i == len(format_string): 219 - raise IncompleteFormatString 220 - return (i, format_string[i]) 221 - i = 0 222 - while i < len(format_string): 223 - c = format_string[i] 224 - if c == '%': 225 - i, c = next_char(i) 226 - # Parse the mapping key (optional). 227 - key = None 228 - if c == '(': 229 - depth = 1 230 - i, c = next_char(i) 231 - key_start = i 232 - while depth != 0: 233 - if c == '(': 234 - depth += 1 235 - elif c == ')': 236 - depth -= 1 237 - i, c = next_char(i) 238 - key_end = i - 1 239 - key = format_string[key_start:key_end] 240 - 241 - # Parse the conversion flags (optional). 242 - while c in '#0- +': 243 - i, c = next_char(i) 244 - # Parse the minimum field width (optional). 245 - if c == '*': 246 - num_args += 1 247 - i, c = next_char(i) 248 - else: 249 - while c in string.digits: 250 - i, c = next_char(i) 251 - # Parse the precision (optional). 252 - if c == '.': 253 - i, c = next_char(i) 254 - if c == '*': 255 - num_args += 1 256 - i, c = next_char(i) 257 - else: 258 - while c in string.digits: 259 - i, c = next_char(i) 260 - # Parse the length modifier (optional). 261 - if c in 'hlL': 262 - i, c = next_char(i) 263 - # Parse the conversion type (mandatory). 264 - if c not in 'diouxXeEfFgGcrs%': 265 - raise UnsupportedFormatCharacter(i) 266 - if key: 267 - keys.add(key) 268 - elif c != '%': 269 - num_args += 1 270 - i += 1 271 - return keys, num_args 272 - 273 OTHER_NODES = (astng.Const, astng.List, astng.Backquote, 274 astng.Lambda, astng.Function, 275 astng.ListComp, astng.SetComp, astng.GenExpr) 276 277 class StringFormatChecker(BaseChecker):
@@ -156,16 +82,16 @@
278 and isinstance(left.value, basestring)): 279 return 280 format_string = left.value 281 try: 282 required_keys, required_num_args = \ 283 - parse_format_string(format_string) 284 - except UnsupportedFormatCharacter, e: 285 + utils.parse_format_string(format_string) 286 + except utils.UnsupportedFormatCharacter, e: 287 c = format_string[e.index] 288 self.add_message('E9900', node=node, args=(c, ord(c), e.index)) 289 return 290 - except IncompleteFormatString: 291 + except utils.IncompleteFormatString: 292 self.add_message('E9901', node=node) 293 return 294 if required_keys and required_num_args: 295 # The format string uses both named and unnamed format 296 # specifiers.
@@ -16,10 +16,11 @@
297 # this program; if not, write to the Free Software Foundation, Inc., 298 # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 299 """some functions that may be useful for various checkers 300 """ 301 302 +import string 303 from logilab import astng 304 from logilab.common.compat import builtins 305 BUILTINS_NAME = builtins.__name__ 306 307 COMP_NODE_TYPES = astng.ListComp, astng.SetComp, astng.DictComp, astng.GenExpr
@@ -209,5 +210,80 @@
308 def store_messages(func): 309 func.checks_msgs = messages 310 return func 311 return store_messages 312 313 +class IncompleteFormatString(Exception): 314 + """A format string ended in the middle of a format specifier.""" 315 + pass 316 + 317 +class UnsupportedFormatCharacter(Exception): 318 + """A format character in a format string is not one of the supported 319 + format characters.""" 320 + def __init__(self, index): 321 + Exception.__init__(self, index) 322 + self.index = index 323 + 324 +def parse_format_string(format_string): 325 + """Parses a format string, returning a tuple of (keys, num_args), where keys 326 + is the set of mapping keys in the format string, and num_args is the number 327 + of arguments required by the format string. Raises 328 + IncompleteFormatString or UnsupportedFormatCharacter if a 329 + parse error occurs.""" 330 + keys = set() 331 + num_args = 0 332 + def next_char(i): 333 + i += 1 334 + if i == len(format_string): 335 + raise IncompleteFormatString 336 + return (i, format_string[i]) 337 + i = 0 338 + while i < len(format_string): 339 + c = format_string[i] 340 + if c == '%': 341 + i, c = next_char(i) 342 + # Parse the mapping key (optional). 343 + key = None 344 + if c == '(': 345 + depth = 1 346 + i, c = next_char(i) 347 + key_start = i 348 + while depth != 0: 349 + if c == '(': 350 + depth += 1 351 + elif c == ')': 352 + depth -= 1 353 + i, c = next_char(i) 354 + key_end = i - 1 355 + key = format_string[key_start:key_end] 356 + 357 + # Parse the conversion flags (optional). 358 + while c in '#0- +': 359 + i, c = next_char(i) 360 + # Parse the minimum field width (optional). 361 + if c == '*': 362 + num_args += 1 363 + i, c = next_char(i) 364 + else: 365 + while c in string.digits: 366 + i, c = next_char(i) 367 + # Parse the precision (optional). 368 + if c == '.': 369 + i, c = next_char(i) 370 + if c == '*': 371 + num_args += 1 372 + i, c = next_char(i) 373 + else: 374 + while c in string.digits: 375 + i, c = next_char(i) 376 + # Parse the length modifier (optional). 377 + if c in 'hlL': 378 + i, c = next_char(i) 379 + # Parse the conversion type (mandatory). 380 + if c not in 'diouxXeEfFgGcrs%': 381 + raise UnsupportedFormatCharacter(i) 382 + if key: 383 + keys.add(key) 384 + elif c != '%': 385 + num_args += 1 386 + i += 1 387 + return keys, num_args
@@ -231,11 +231,11 @@
388 def test_errors_only(self): 389 linter = self.linter 390 self.linter.error_mode() 391 checkers = self.linter.prepare_checkers() 392 checker_names = tuple(c.name for c in checkers) 393 - should_not = ('design', 'format', 'imports', 'logging', 'metrics', 394 + should_not = ('design', 'format', 'imports', 'metrics', 395 'miscellaneous', 'similarities') 396 self.failIf(any(name in checker_names for name in should_not)) 397 398 def test_disable_similar(self): 399 # XXX we have to disable them both, that's no good
@@ -247,11 +247,11 @@
400 """check that we disabled a lot of checkers""" 401 self.linter.set_option('reports', False) 402 # FIXME should it be necessary to explicitly desactivate failures ? 403 self.linter.set_option('disable', 'R,C,W') 404 checker_names = [c.name for c in self.linter.prepare_checkers()] 405 - should_not = ('design', 'logging', 'metrics', 'similarities') 406 + should_not = ('design', 'metrics', 'similarities') 407 rest = [name for name in checker_names if name in should_not] 408 self.assertListEqual(rest, []) 409 self.linter.set_option('disable', 'R,C,W,F') 410 checker_names = [c.name for c in self.linter.prepare_checkers()] 411 should_not += ('format', 'imports')