closes #69993: Additional string format checks for logging module

authorSylvain Thénault <sylvain.thenault@logilab.fr>
changeset00766652f2a1
branchdefault
phasepublic
hiddenno
parent revision#2d13a4369245 add note for IDE developpers
child revision#159441fa93cc fix dumb name error in test
files modified by this revision
ChangeLog
checkers/logging.py
checkers/string_format.py
checkers/utils.py
test/unittest_lint.py
# HG changeset patch
# User Sylvain Thénault <sylvain.thenault@logilab.fr>
# Date 1310111844 -7200
# Fri Jul 08 09:57:24 2011 +0200
# Node ID 00766652f2a1163078a347946433d0dfb7347302
# Parent 2d13a436924557726d71a7dba6ea337433f96748
closes #69993: Additional string format checks for logging module

diff --git a/ChangeLog b/ChangeLog
@@ -1,9 +1,14 @@
1  ChangeLog for PyLint
2  ====================
3 
4  	--
5 +	
6 +    * #69993: Additional string format checks for logging module:
7 +      check for missing arguments, too many arguments, or invalid string
8 +      formats in the logging checker module. Contributed by Daniel Arena
9 +	
10      * #69220: add column offset to the reports. If you've a custom reporter,
11        this change may break it has now location gain a new item giving the
12        column offset.
13 
14  2011-01-11  --  0.23.0
diff --git a/checkers/logging.py b/checkers/logging.py
@@ -15,37 +15,48 @@
15  """
16 
17  from logilab import astng
18  from pylint import checkers
19  from pylint import interfaces
20 +from pylint.checkers import utils
21 
22 -EAGER_STRING_INTERPOLATION = 'W6501'
23 +
24 +MSGS = {
25 +    'W6501': ('Specify string format arguments as logging function parameters',
26 +             'Used when a logging statement has a call form of '
27 +             '"logging.<logging method>(format_string % (format_args...))". '
28 +             'Such calls should leave string interpolation to the logging '
29 +             'method itself and be written '
30 +             '"logging.<logging method>(format_string, format_args...)" '
31 +             'so that the program may avoid incurring the cost of the '
32 +             'interpolation in those cases in which no message will be '
33 +             'logged. For more, see '
34 +             'http://www.python.org/dev/peps/pep-0282/.'),
35 +    'E6500': ('Unsupported logging format character %r (%#02x) at index %d',
36 +              'Used when an unsupported format character is used in a logging\
37 +              statement format string.'),
38 +    'E6501': ('Logging format string ends in middle of conversion specifier',
39 +              'Used when a logging statement format string terminates before\
40 +              the end of a conversion specifier.'),
41 +    'E6505': ('Too many arguments for logging format string',
42 +              'Used when a logging format string is given too few arguments.'),
43 +    'E6506': ('Not enough arguments for logging format string',
44 +              'Used when a logging format string is given too many arguments'),
45 +    }
46 +
47 
48  CHECKED_CONVENIENCE_FUNCTIONS = set([
49      'critical', 'debug', 'error', 'exception', 'fatal', 'info', 'warn',
50      'warning'])
51 
52 
53  class LoggingChecker(checkers.BaseChecker):
54      """Checks use of the logging module."""
55 
56      __implements__ = interfaces.IASTNGChecker
57 -
58      name = 'logging'
59 -
60 -    msgs = {EAGER_STRING_INTERPOLATION:
61 -            ('Specify string format arguments as logging function parameters',
62 -             'Used when a logging statement has a call form of '
63 -             '"logging.<logging method>(format_string % (format_args...))". '
64 -             'Such calls should leave string interpolation to the logging '
65 -             'method itself and be written '
66 -             '"logging.<logging method>(format_string, format_args...)" '
67 -             'so that the program may avoid incurring the cost of the '
68 -             'interpolation in those cases in which no message will be '
69 -             'logged. For more, see '
70 -             'http://www.python.org/dev/peps/pep-0282/.')
71 -            }
72 +    msgs = MSGS
73 
74      def visit_module(self, unused_node):
75          """Clears any state left in this checker from last module checked."""
76          # The code being checked can just as easily "import logging as foo",
77          # so it is necessary to process the imports and store in this field
@@ -65,33 +76,90 @@
78          """Checks calls to (simple forms of) logging methods."""
79          if (not isinstance(node.func, astng.Getattr)
80              or not isinstance(node.func.expr, astng.Name)
81              or node.func.expr.name != self._logging_name):
82              return
83 -        self._CheckConvenienceMethods(node)
84 -        self._CheckLogMethod(node)
85 +        self._check_convenience_methods(node)
86 +        self._check_log_methods(node)
87 
88 -    def _CheckConvenienceMethods(self, node):
89 +    def _check_convenience_methods(self, node):
90          """Checks calls to logging convenience methods (like logging.warn)."""
91          if node.func.attrname not in CHECKED_CONVENIENCE_FUNCTIONS:
92              return
93 -        if not node.args:
94 -            # Either no args, or star args, or double-star args. Beyond the
95 -            # scope of this checker in any case.
96 +        if node.starargs or node.kwargs or not node.args:
97 +            # Either no args, star args, or double-star args. Beyond the
98 +            # scope of this checker.
99              return
100          if isinstance(node.args[0], astng.BinOp) and node.args[0].op == '%':
101 -            self.add_message(EAGER_STRING_INTERPOLATION, node=node)
102 +            self.add_message('W6501', node=node)
103 +        elif isinstance(node.args[0], astng.Const):
104 +            self._check_format_string(node, 0)
105 
106 -    def _CheckLogMethod(self, node):
107 +    def _check_log_methods(self, node):
108          """Checks calls to logging.log(level, format, *format_args)."""
109          if node.func.attrname != 'log':
110              return
111 -        if len(node.args) < 2:
112 -            # Either a malformed call or something with crazy star args or
113 -            # double-star args magic. Beyond the scope of this checker.
114 +        if node.starargs or node.kwargs or len(node.args) < 2:
115 +            # Either a malformed call, star args, or double-star args. Beyond
116 +            # the scope of this checker.
117              return
118          if isinstance(node.args[1], astng.BinOp) and node.args[1].op == '%':
119 -            self.add_message(EAGER_STRING_INTERPOLATION, node=node)
120 +            self.add_message('W6501', node=node)
121 +        elif isinstance(node.args[1], astng.Const):
122 +            self._check_format_string(node, 1)
123 +
124 +    def _check_format_string(self, node, format_arg):
125 +        """Checks that format string tokens match the supplied arguments.
126 +
127 +        Args:
128 +          node: AST node to be checked.
129 +          format_arg: Index of the format string in the node arguments.
130 +        """
131 +        num_args = self._count_supplied_tokens(node.args[format_arg + 1:])
132 +        if not num_args:
133 +            # If no args were supplied, then all format strings are valid -
134 +            # don't check any further.
135 +            return
136 +        format_string = node.args[format_arg].value
137 +        if not isinstance(format_string, basestring):
138 +            # If the log format is constant non-string (e.g. logging.debug(5)),
139 +            # ensure there are no arguments.
140 +            required_num_args = 0
141 +        else:
142 +            try:
143 +                keyword_args, required_num_args = \
144 +                    utils.parse_format_string(format_string)
145 +                if keyword_args:
146 +                    # Keyword checking on logging strings is complicated by
147 +                    # special keywords - out of scope.
148 +                    return
149 +            except utils.UnsupportedFormatCharacter, e:
150 +                c = format_string[e.index]
151 +                self.add_message('E6500', node=node, args=(c, ord(c), e.index))
152 +                return
153 +            except utils.IncompleteFormatString:
154 +                self.add_message('E6501', node=node)
155 +                return
156 +        if num_args > required_num_args:
157 +            self.add_message('E6505', node=node)
158 +        elif num_args < required_num_args:
159 +            self.add_message('E6506', node=node)
160 +
161 +    def _count_supplied_tokens(self, args):
162 +        """Counts the number of tokens in an args list.
163 +
164 +        The Python log functions allow for special keyword arguments: func,
165 +        exc_info and extra. To handle these cases correctly, we only count
166 +        arguments that aren't keywords.
167 +
168 +        Args:
169 +          args: List of AST nodes that are arguments for a log format string.
170 +
171 +        Returns:
172 +          Number of AST nodes that aren't keywords.
173 +        """
174 +        return sum(1 for arg in args if not isinstance(arg, astng.Keyword))
175 +
176 
177  def register(linter):
178      """Required method to auto-register this checker."""
179      linter.register_checker(LoggingChecker(linter))
diff --git a/checkers/string_format.py b/checkers/string_format.py
@@ -20,10 +20,12 @@
180 
181  import string
182  from logilab import astng
183  from pylint.interfaces import IASTNGChecker
184  from pylint.checkers import BaseChecker
185 +from pylint.checkers import utils
186 +
187 
188  MSGS = {
189      'E9900': ("Unsupported format character %r (%#02x) at index %d",
190                "Used when a unsupported format character is used in a format\
191                string."),
@@ -55,86 +57,10 @@
192      'E9906': ("Not enough arguments for format string",
193                "Used when a format string that uses unnamed conversion \
194                specifiers is given too many arguments"),
195      }
196 
197 -class IncompleteFormatString(Exception):
198 -    """A format string ended in the middle of a format specifier."""
199 -    pass
200 -
201 -class UnsupportedFormatCharacter(Exception):
202 -    """A format character in a format string is not one of the supported
203 -    format characters."""
204 -    def __init__(self, index):
205 -        Exception.__init__(self, index)
206 -        self.index = index
207 -
208 -def parse_format_string(format_string):
209 -    """Parses a format string, returning a tuple of (keys, num_args), where keys
210 -    is the set of mapping keys in the format string, and num_args is the number
211 -    of arguments required by the format string.  Raises
212 -    IncompleteFormatString or UnsupportedFormatCharacter if a
213 -    parse error occurs."""
214 -    keys = set()
215 -    num_args = 0
216 -    def next_char(i):
217 -        i += 1
218 -        if i == len(format_string):
219 -            raise IncompleteFormatString
220 -        return (i, format_string[i])
221 -    i = 0
222 -    while i < len(format_string):
223 -        c = format_string[i]
224 -        if c == '%':
225 -            i, c = next_char(i)
226 -            # Parse the mapping key (optional).
227 -            key = None
228 -            if c == '(':
229 -                depth = 1
230 -                i, c = next_char(i)
231 -                key_start = i
232 -                while depth != 0:
233 -                    if c == '(':
234 -                        depth += 1
235 -                    elif c == ')':
236 -                        depth -= 1
237 -                    i, c = next_char(i)
238 -                key_end = i - 1
239 -                key = format_string[key_start:key_end]
240 -
241 -            # Parse the conversion flags (optional).
242 -            while c in '#0- +':
243 -                i, c = next_char(i)
244 -            # Parse the minimum field width (optional).
245 -            if c == '*':
246 -                num_args += 1
247 -                i, c = next_char(i)
248 -            else:
249 -                while c in string.digits:
250 -                    i, c = next_char(i)
251 -            # Parse the precision (optional).
252 -            if c == '.':
253 -                i, c = next_char(i)
254 -                if c == '*':
255 -                    num_args += 1
256 -                    i, c = next_char(i)
257 -                else:
258 -                    while c in string.digits:
259 -                        i, c = next_char(i)
260 -            # Parse the length modifier (optional).
261 -            if c in 'hlL':
262 -                i, c = next_char(i)
263 -            # Parse the conversion type (mandatory).
264 -            if c not in 'diouxXeEfFgGcrs%':
265 -                raise UnsupportedFormatCharacter(i)
266 -            if key:
267 -                keys.add(key)
268 -            elif c != '%':
269 -                num_args += 1
270 -        i += 1
271 -    return keys, num_args
272 -
273  OTHER_NODES = (astng.Const, astng.List, astng.Backquote,
274                 astng.Lambda, astng.Function,
275                 astng.ListComp, astng.SetComp, astng.GenExpr)
276 
277  class StringFormatChecker(BaseChecker):
@@ -156,16 +82,16 @@
278              and isinstance(left.value, basestring)):
279              return
280          format_string = left.value
281          try:
282              required_keys, required_num_args = \
283 -                parse_format_string(format_string)
284 -        except UnsupportedFormatCharacter, e:
285 +                utils.parse_format_string(format_string)
286 +        except utils.UnsupportedFormatCharacter, e:
287              c = format_string[e.index]
288              self.add_message('E9900', node=node, args=(c, ord(c), e.index))
289              return
290 -        except IncompleteFormatString:
291 +        except utils.IncompleteFormatString:
292              self.add_message('E9901', node=node)
293              return
294          if required_keys and required_num_args:
295              # The format string uses both named and unnamed format
296              # specifiers.
diff --git a/checkers/utils.py b/checkers/utils.py
@@ -16,10 +16,11 @@
297  # this program; if not, write to the Free Software Foundation, Inc.,
298  # 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
299  """some functions that may be useful for various checkers
300  """
301 
302 +import string
303  from logilab import astng
304  from logilab.common.compat import builtins
305  BUILTINS_NAME = builtins.__name__
306 
307  COMP_NODE_TYPES = astng.ListComp, astng.SetComp, astng.DictComp, astng.GenExpr
@@ -209,5 +210,80 @@
308      def store_messages(func):
309          func.checks_msgs = messages
310          return func
311      return store_messages
312 
313 +class IncompleteFormatString(Exception):
314 +    """A format string ended in the middle of a format specifier."""
315 +    pass
316 +
317 +class UnsupportedFormatCharacter(Exception):
318 +    """A format character in a format string is not one of the supported
319 +    format characters."""
320 +    def __init__(self, index):
321 +        Exception.__init__(self, index)
322 +        self.index = index
323 +
324 +def parse_format_string(format_string):
325 +    """Parses a format string, returning a tuple of (keys, num_args), where keys
326 +    is the set of mapping keys in the format string, and num_args is the number
327 +    of arguments required by the format string.  Raises
328 +    IncompleteFormatString or UnsupportedFormatCharacter if a
329 +    parse error occurs."""
330 +    keys = set()
331 +    num_args = 0
332 +    def next_char(i):
333 +        i += 1
334 +        if i == len(format_string):
335 +            raise IncompleteFormatString
336 +        return (i, format_string[i])
337 +    i = 0
338 +    while i < len(format_string):
339 +        c = format_string[i]
340 +        if c == '%':
341 +            i, c = next_char(i)
342 +            # Parse the mapping key (optional).
343 +            key = None
344 +            if c == '(':
345 +                depth = 1
346 +                i, c = next_char(i)
347 +                key_start = i
348 +                while depth != 0:
349 +                    if c == '(':
350 +                        depth += 1
351 +                    elif c == ')':
352 +                        depth -= 1
353 +                    i, c = next_char(i)
354 +                key_end = i - 1
355 +                key = format_string[key_start:key_end]
356 +
357 +            # Parse the conversion flags (optional).
358 +            while c in '#0- +':
359 +                i, c = next_char(i)
360 +            # Parse the minimum field width (optional).
361 +            if c == '*':
362 +                num_args += 1
363 +                i, c = next_char(i)
364 +            else:
365 +                while c in string.digits:
366 +                    i, c = next_char(i)
367 +            # Parse the precision (optional).
368 +            if c == '.':
369 +                i, c = next_char(i)
370 +                if c == '*':
371 +                    num_args += 1
372 +                    i, c = next_char(i)
373 +                else:
374 +                    while c in string.digits:
375 +                        i, c = next_char(i)
376 +            # Parse the length modifier (optional).
377 +            if c in 'hlL':
378 +                i, c = next_char(i)
379 +            # Parse the conversion type (mandatory).
380 +            if c not in 'diouxXeEfFgGcrs%':
381 +                raise UnsupportedFormatCharacter(i)
382 +            if key:
383 +                keys.add(key)
384 +            elif c != '%':
385 +                num_args += 1
386 +        i += 1
387 +    return keys, num_args
diff --git a/test/unittest_lint.py b/test/unittest_lint.py
@@ -231,11 +231,11 @@
388      def test_errors_only(self):
389          linter = self.linter
390          self.linter.error_mode()
391          checkers = self.linter.prepare_checkers()
392          checker_names = tuple(c.name for c in checkers)
393 -        should_not = ('design', 'format', 'imports', 'logging', 'metrics',
394 +        should_not = ('design', 'format', 'imports', 'metrics',
395                        'miscellaneous', 'similarities')
396          self.failIf(any(name in checker_names for name in should_not))
397 
398      def test_disable_similar(self):
399          # XXX we have to disable them both, that's no good
@@ -247,11 +247,11 @@
400          """check that we disabled a lot of checkers"""
401          self.linter.set_option('reports', False)
402          # FIXME should it be necessary to explicitly desactivate failures ?
403          self.linter.set_option('disable', 'R,C,W')
404          checker_names = [c.name for c in self.linter.prepare_checkers()]
405 -        should_not = ('design', 'logging', 'metrics', 'similarities')
406 +        should_not = ('design', 'metrics', 'similarities')
407          rest = [name for name in checker_names if name in should_not]
408          self.assertListEqual(rest, [])
409          self.linter.set_option('disable', 'R,C,W,F')
410          checker_names = [c.name for c in self.linter.prepare_checkers()]
411          should_not +=  ('format', 'imports')