Patch against 745 implement logging checkers (69993_patch.txt)

Patch against 745 to implement additional logging checkers for bad format strings.

download

diff -r 71243b772f2b checkers/logging.py
--- a/checkers/logging.py Thu Jun 16 19:29:00 2011 +0200
+++ b/checkers/logging.py Tue Jun 28 18:21:34 2011 +1000
@@ -17,23 +17,11 @@
from logilab import astng
from pylint import checkers
from pylint import interfaces
+from pylint.checkers import utils

-EAGER_STRING_INTERPOLATION = 'W6501'

-CHECKED_CONVENIENCE_FUNCTIONS = set([
- 'critical', 'debug', 'error', 'exception', 'fatal', 'info', 'warn',
- 'warning'])
-
-
-class LoggingChecker(checkers.BaseChecker):
- """Checks use of the logging module."""
-
- __implements__ = interfaces.IASTNGChecker
-
- name = 'logging'
-
- msgs = {EAGER_STRING_INTERPOLATION:
- ('Specify string format arguments as logging function parameters',
+MSGS = {
+ 'W6501': ('Specify string format arguments as logging function parameters',
'Used when a logging statement has a call form of '
'"logging.<logging method>(format_string % (format_args...))". '
'Such calls should leave string interpolation to the logging '
@@ -42,8 +30,31 @@
'so that the program may avoid incurring the cost of the '
'interpolation in those cases in which no message will be '
'logged. For more, see '
- 'http://www.python.org/dev/peps/pep-0282/.')
- }
+ 'http://www.python.org/dev/peps/pep-0282/.'),
+ 'E6500': ('Unsupported logging format character %r (%#02x) at index %d',
+ 'Used when an unsupported format character is used in a logging\
+ statement format string.'),
+ 'E6501': ('Logging format string ends in middle of conversion specifier',
+ 'Used when a logging statement format string terminates before\
+ the end of a conversion specifier.'),
+ 'E6505': ('Too many arguments for logging format string',
+ 'Used when a logging format string is given too few arguments.'),
+ 'E6506': ('Not enough arguments for logging format string',
+ 'Used when a logging format string is given too many arguments'),
+ }
+
+
+CHECKED_CONVENIENCE_FUNCTIONS = set([
+ 'critical', 'debug', 'error', 'exception', 'fatal', 'info', 'warn',
+ 'warning'])
+
+
+class LoggingChecker(checkers.BaseChecker):
+ """Checks use of the logging module."""
+
+ __implements__ = interfaces.IASTNGChecker
+ name = 'logging'
+ msgs = MSGS

def visit_module(self, unused_node):
"""Clears any state left in this checker from last module checked."""
@@ -67,30 +78,87 @@
or not isinstance(node.func.expr, astng.Name)
or node.func.expr.name != self._logging_name):
return
- self._CheckConvenienceMethods(node)
- self._CheckLogMethod(node)
+ self._check_convenience_methods(node)
+ self._check_log_methods(node)

- def _CheckConvenienceMethods(self, node):
+ def _check_convenience_methods(self, node):
"""Checks calls to logging convenience methods (like logging.warn)."""
if node.func.attrname not in CHECKED_CONVENIENCE_FUNCTIONS:
return
- if not node.args:
- # Either no args, or star args, or double-star args. Beyond the
- # scope of this checker in any case.
+ if node.starargs or node.kwargs or not node.args:
+ # Either no args, star args, or double-star args. Beyond the
+ # scope of this checker.
return
if isinstance(node.args[0], astng.BinOp) and node.args[0].op == '%':
- self.add_message(EAGER_STRING_INTERPOLATION, node=node)
+ self.add_message('W6501', node=node)
+ elif isinstance(node.args[0], astng.Const):
+ self._check_format_string(node, 0)

- def _CheckLogMethod(self, node):
+ def _check_log_methods(self, node):
"""Checks calls to logging.log(level, format, *format_args)."""
if node.func.attrname != 'log':
return
- if len(node.args) < 2:
- # Either a malformed call or something with crazy star args or
- # double-star args magic. Beyond the scope of this checker.
+ if node.starargs or node.kwargs or len(node.args) < 2:
+ # Either a malformed call, star args, or double-star args. Beyond
+ # the scope of this checker.
return
if isinstance(node.args[1], astng.BinOp) and node.args[1].op == '%':
- self.add_message(EAGER_STRING_INTERPOLATION, node=node)
+ self.add_message('W6501', node=node)
+ elif isinstance(node.args[1], astng.Const):
+ self._check_format_string(node, 1)
+
+ def _check_format_string(self, node, format_arg):
+ """Checks that format string tokens match the supplied arguments.
+
+ Args:
+ node: AST node to be checked.
+ format_arg: Index of the format string in the node arguments.
+ """
+ num_args = self._count_supplied_tokens(node.args[format_arg + 1:])
+ if not num_args:
+ # If no args were supplied, then all format strings are valid -
+ # don't check any further.
+ return
+ format_string = node.args[format_arg].value
+ if not isinstance(format_string, basestring):
+ # If the log format is constant non-string (e.g. logging.debug(5)),
+ # ensure there are no arguments.
+ required_num_args = 0
+ else:
+ try:
+ keyword_args, required_num_args = \
+ utils.parse_format_string(format_string)
+ if keyword_args:
+ # Keyword checking on logging strings is complicated by
+ # special keywords - out of scope.
+ return
+ except utils.UnsupportedFormatCharacter as e:
+ c = format_string[e.index]
+ self.add_message('E6500', node=node, args=(c, ord(c), e.index))
+ return
+ except utils.IncompleteFormatString:
+ self.add_message('E6501', node=node)
+ return
+ if num_args > required_num_args:
+ self.add_message('E6505', node=node)
+ elif num_args < required_num_args:
+ self.add_message('E6506', node=node)
+
+ def _count_supplied_tokens(self, args):
+ """Counts the number of tokens in an args list.
+
+ The Python log functions allow for special keyword arguments: func,
+ exc_info and extra. To handle these cases correctly, we only count
+ arguments that aren't keywords.
+
+ Args:
+ args: List of AST nodes that are arguments for a log format string.
+
+ Returns:
+ Number of AST nodes that aren't keywords.
+ """
+ return sum(1 for arg in args if not isinstance(arg, astng.Keyword))
+

def register(linter):
"""Required method to auto-register this checker."""
diff -r 71243b772f2b checkers/string_format.py
--- a/checkers/string_format.py Thu Jun 16 19:29:00 2011 +0200
+++ b/checkers/string_format.py Tue Jun 28 18:21:34 2011 +1000
@@ -22,6 +22,8 @@
from logilab import astng
from pylint.interfaces import IASTNGChecker
from pylint.checkers import BaseChecker
+from pylint.checkers import utils
+

MSGS = {
'E9900': ("Unsupported format character %r (%#02x) at index %d",
@@ -57,82 +59,6 @@
specifiers is given too many arguments"),
}

-class IncompleteFormatString(Exception):
- """A format string ended in the middle of a format specifier."""
- pass
-
-class UnsupportedFormatCharacter(Exception):
- """A format character in a format string is not one of the supported
- format characters."""
- def __init__(self, index):
- Exception.__init__(self, index)
- self.index = index
-
-def parse_format_string(format_string):
- """Parses a format string, returning a tuple of (keys, num_args), where keys
- is the set of mapping keys in the format string, and num_args is the number
- of arguments required by the format string. Raises
- IncompleteFormatString or UnsupportedFormatCharacter if a
- parse error occurs."""
- keys = set()
- num_args = 0
- def next_char(i):
- i += 1
- if i == len(format_string):
- raise IncompleteFormatString
- return (i, format_string[i])
- i = 0
- while i < len(format_string):
- c = format_string[i]
- if c == '%':
- i, c = next_char(i)
- # Parse the mapping key (optional).
- key = None
- if c == '(':
- depth = 1
- i, c = next_char(i)
- key_start = i
- while depth != 0:
- if c == '(':
- depth += 1
- elif c == ')':
- depth -= 1
- i, c = next_char(i)
- key_end = i - 1
- key = format_string[key_start:key_end]
-
- # Parse the conversion flags (optional).
- while c in '#0- +':
- i, c = next_char(i)
- # Parse the minimum field width (optional).
- if c == '*':
- num_args += 1
- i, c = next_char(i)
- else:
- while c in string.digits:
- i, c = next_char(i)
- # Parse the precision (optional).
- if c == '.':
- i, c = next_char(i)
- if c == '*':
- num_args += 1
- i, c = next_char(i)
- else:
- while c in string.digits:
- i, c = next_char(i)
- # Parse the length modifier (optional).
- if c in 'hlL':
- i, c = next_char(i)
- # Parse the conversion type (mandatory).
- if c not in 'diouxXeEfFgGcrs%':
- raise UnsupportedFormatCharacter(i)
- if key:
- keys.add(key)
- elif c != '%':
- num_args += 1
- i += 1
- return keys, num_args
-
OTHER_NODES = (astng.Const, astng.List, astng.Backquote,
astng.Lambda, astng.Function,
astng.ListComp, astng.SetComp, astng.GenExpr)
@@ -158,12 +84,12 @@
format_string = left.value
try:
required_keys, required_num_args = \
- parse_format_string(format_string)
- except UnsupportedFormatCharacter, e:
+ utils.parse_format_string(format_string)
+ except utils.UnsupportedFormatCharacter as e:
c = format_string[e.index]
self.add_message('E9900', node=node, args=(c, ord(c), e.index))
return
- except IncompleteFormatString:
+ except utils.IncompleteFormatString:
self.add_message('E9901', node=node)
return
if required_keys and required_num_args:
diff -r 71243b772f2b checkers/utils.py
--- a/checkers/utils.py Thu Jun 16 19:29:00 2011 +0200
+++ b/checkers/utils.py Tue Jun 28 18:21:34 2011 +1000
@@ -18,6 +18,7 @@
"""some functions that may be useful for various checkers
"""

+import string
from logilab import astng
from logilab.common.compat import builtins
BUILTINS_NAME = builtins.__name__
@@ -211,3 +212,78 @@
return func
return store_messages

+class IncompleteFormatString(Exception):
+ """A format string ended in the middle of a format specifier."""
+ pass
+
+class UnsupportedFormatCharacter(Exception):
+ """A format character in a format string is not one of the supported
+ format characters."""
+ def __init__(self, index):
+ Exception.__init__(self, index)
+ self.index = index
+
+def parse_format_string(format_string):
+ """Parses a format string, returning a tuple of (keys, num_args), where keys
+ is the set of mapping keys in the format string, and num_args is the number
+ of arguments required by the format string. Raises
+ IncompleteFormatString or UnsupportedFormatCharacter if a
+ parse error occurs."""
+ keys = set()
+ num_args = 0
+ def next_char(i):
+ i += 1
+ if i == len(format_string):
+ raise IncompleteFormatString
+ return (i, format_string[i])
+ i = 0
+ while i < len(format_string):
+ c = format_string[i]
+ if c == '%':
+ i, c = next_char(i)
+ # Parse the mapping key (optional).
+ key = None
+ if c == '(':
+ depth = 1
+ i, c = next_char(i)
+ key_start = i
+ while depth != 0:
+ if c == '(':
+ depth += 1
+ elif c == ')':
+ depth -= 1
+ i, c = next_char(i)
+ key_end = i - 1
+ key = format_string[key_start:key_end]
+
+ # Parse the conversion flags (optional).
+ while c in '#0- +':
+ i, c = next_char(i)
+ # Parse the minimum field width (optional).
+ if c == '*':
+ num_args += 1
+ i, c = next_char(i)
+ else:
+ while c in string.digits:
+ i, c = next_char(i)
+ # Parse the precision (optional).
+ if c == '.':
+ i, c = next_char(i)
+ if c == '*':
+ num_args += 1
+ i, c = next_char(i)
+ else:
+ while c in string.digits:
+ i, c = next_char(i)
+ # Parse the length modifier (optional).
+ if c in 'hlL':
+ i, c = next_char(i)
+ # Parse the conversion type (mandatory).
+ if c not in 'diouxXeEfFgGcrs%':
+ raise UnsupportedFormatCharacter(i)
+ if key:
+ keys.add(key)
+ elif c != '%':
+ num_args += 1
+ i += 1
+ return keys, num_args
diff -r 71243b772f2b test/input/func_e65xx.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/input/func_e65xx.py Tue Jun 28 18:21:34 2011 +1000
@@ -0,0 +1,27 @@
+# pylint: disable=E1101
+"""Test checking of log format strings
+"""
+
+__revision__ = ''
+
+import logging
+
+def pprint():
+ """Test string format in logging statements.
+ """
+ # These should all emit lint errors:
+ logging.info(0, '') # 6505
+ logging.info('', '') # 6505
+ logging.info('%s%', '') # 6501
+ logging.info('%s%s', '') # 6506
+ logging.info('%s%a', '', '') # 6500
+ logging.info('%s%s', '', '', '') # 6505
+
+ # These should be okay:
+ logging.info(1)
+ logging.info(True)
+ logging.info('')
+ logging.info('%s%')
+ logging.info('%s', '')
+ logging.info('%s%%', '')
+ logging.info('%s%s', '', '')
diff -r 71243b772f2b test/messages/func_e65xx.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/messages/func_e65xx.txt Tue Jun 28 18:21:34 2011 +1000
@@ -0,0 +1,6 @@
+E: 13:pprint: Too many arguments for logging format string
+E: 14:pprint: Too many arguments for logging format string
+E: 15:pprint: Logging format string ends in middle of conversion specifier
+E: 16:pprint: Not enough arguments for logging format string
+E: 17:pprint: Unsupported logging format character 'a' (0x61) at index 3
+E: 18:pprint: Too many arguments for logging format string
diff -r 71243b772f2b test/unittest_lint.py
--- a/test/unittest_lint.py Thu Jun 16 19:29:00 2011 +0200
+++ b/test/unittest_lint.py Tue Jun 28 18:21:34 2011 +1000
@@ -233,7 +233,7 @@
self.linter.error_mode()
checkers = self.linter.prepare_checkers()
checker_names = tuple(c.name for c in checkers)
- should_not = ('design', 'format', 'imports', 'logging', 'metrics',
+ should_not = ('design', 'format', 'imports', 'metrics',
'miscellaneous', 'similarities')
self.failIf(any(name in checker_names for name in should_not))

@@ -249,7 +249,7 @@
# FIXME should it be necessary to explicitly desactivate failures ?
self.linter.set_option('disable', 'R,C,W')
checker_names = [c.name for c in self.linter.prepare_checkers()]
- should_not = ('design', 'logging', 'metrics', 'similarities')
+ should_not = ('design', 'metrics', 'similarities')
rest = [name for name in checker_names if name in should_not]
self.assertListEqual(rest, [])
self.linter.set_option('disable', 'R,C,W,F')