Add --ignore-imports option to similarity checking. Closes #106534.

Additionally:
  • add access to existing --ignore-docstrings option to symilar command line
  • add access to new --ignore-imports option to symilar command line
  • add test for existing --ignore-docstring feature
  • add test for new --ignore-imports feature
authorRy4an Brase <ry4an-hg@ry4an.org>
changeset19502af3b7c9
branchdefault
phasepublic
hiddenno
parent revision#7a1e32ae0c60 don't want to run a checker only because of a Fatal error
child revision#8f8420148961 explicit trailing whitespace, avoid temptation to drop those (thx Martin)
files modified by this revision
ChangeLog
checkers/similar.py
test/input/similar1
test/input/similar2
test/test_similar.py
# HG changeset patch
# User Ry4an Brase <ry4an-hg@ry4an.org>
# Date 1348976856 14400
# Sat Sep 29 23:47:36 2012 -0400
# Node ID 19502af3b7c9f321aafae7f1772ac2d6bca4b714
# Parent 7a1e32ae0c60d872568382a76f3621ce72a81b31
Add --ignore-imports option to similarity checking. Closes #106534.

Additionally:
- add access to existing --ignore-docstrings option to symilar command line
- add access to new --ignore-imports option to symilar command line
- add test for existing --ignore-docstring feature
- add test for new --ignore-imports feature

diff --git a/ChangeLog b/ChangeLog
@@ -1,9 +1,12 @@
1  ChangeLog for PyLint
2  ====================
3 
4  --
5 +    * #106534: add --ignore-imports option to code similarity checking
6 +      and 'symilar' command line tool (patch by Ry4an Brase)
7 +
8      * #104571: check for anomalous backslash escape, introducing new
9        W1401 and W1402 messages (patch by Martin Pool)
10 
11      * #100707: check for boolop being used as exception class, introducing
12        new W0711 message (patch by Tim Hatch)
diff --git a/checkers/similar.py b/checkers/similar.py
@@ -27,23 +27,25 @@
13 
14  class Similar:
15      """finds copy-pasted lines of code in a project"""
16 
17      def __init__(self, min_lines=4, ignore_comments=False,
18 -                 ignore_docstrings=False):
19 +                 ignore_docstrings=False, ignore_imports=False):
20          self.min_lines = min_lines
21          self.ignore_comments = ignore_comments
22          self.ignore_docstrings = ignore_docstrings
23 +        self.ignore_imports = ignore_imports
24          self.linesets = []
25 
26      def append_stream(self, streamid, stream):
27          """append a file to search for similarities"""
28          stream.seek(0) # XXX may be removed with astng > 0.23
29          self.linesets.append(LineSet(streamid,
30                                       stream.readlines(),
31                                       self.ignore_comments,
32 -                                     self.ignore_docstrings))
33 +                                     self.ignore_docstrings,
34 +                                     self.ignore_imports))
35 
36      def run(self):
37          """start looking for similarities and display results on stdout"""
38          self._display_sims(self._compute_sims())
39 
@@ -121,11 +123,15 @@
40          for idx, lineset in enumerate(self.linesets[:-1]):
41              for lineset2 in self.linesets[idx+1:]:
42                  for sim in self._find_common(lineset, lineset2):
43                      yield sim
44 
45 -def stripped_lines(lines, ignore_comments, ignore_docstrings):
46 +def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
47 +    """return lines with leading/trailing whitespace and any ignored code
48 +    features removed
49 +    """
50 +
51      strippedlines = []
52      docstring = None
53      for line in lines:
54          line = line.strip()
55          if ignore_docstrings:
@@ -135,25 +141,29 @@
56                  line = line[3:]
57              if docstring:
58                  if line.endswith(docstring):
59                      docstring = None
60                  line = ''
61 +        if ignore_imports:
62 +            if line.startswith("import ") or line.startswith("from "):
63 +                line = ''
64          if ignore_comments:
65              # XXX should use regex in checkers/format to avoid cutting
66              # at a "#" in a string
67              line = line.split('#', 1)[0].strip()
68          strippedlines.append(line)
69      return strippedlines
70 
71  class LineSet:
72      """Holds and indexes all the lines of a single source file"""
73      def __init__(self, name, lines, ignore_comments=False,
74 -                 ignore_docstrings=False):
75 +                 ignore_docstrings=False, ignore_imports=False):
76          self.name = name
77          self._real_lines = lines
78          self._stripped_lines = stripped_lines(lines, ignore_comments,
79 -                                              ignore_docstrings)
80 +                                              ignore_docstrings,
81 +                                              ignore_imports)
82          self._index = self._mk_index()
83 
84      def __str__(self):
85          return '<Lineset for %s>' % self.name
86 
@@ -234,10 +244,14 @@
87                  ),
88                 ('ignore-docstrings',
89                  {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
90                   'help': 'Ignore docstrings when computing similarities.'}
91                  ),
92 +               ('ignore-imports',
93 +                {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
94 +                 'help': 'Ignore imports when computing similarities.'}
95 +                ),
96                 )
97      # reports
98      reports = ( ('RP0801', 'Duplication', report_similarities), )
99 
100      def __init__(self, linter=None):
@@ -256,10 +270,12 @@
101              self.min_lines = self.config.min_similarity_lines
102          elif optname == 'ignore-comments':
103              self.ignore_comments = self.config.ignore_comments
104          elif optname == 'ignore-docstrings':
105              self.ignore_docstrings = self.config.ignore_docstrings
106 +        elif optname == 'ignore-imports':
107 +            self.ignore_imports = self.config.ignore_imports
108 
109      def open(self):
110          """init the checkers: reset linesets and statistics information"""
111          self.linesets = []
112          self.stats = self.linter.add_stats(nb_duplicated_lines=0,
@@ -300,33 +316,40 @@
113  def usage(status=0):
114      """display command line usage information"""
115      print "finds copy pasted blocks in a set of files"
116      print
117      print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \
118 -[-i|--ignore-comments] file1...'
119 +[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...'
120      sys.exit(status)
121 
122  def Run(argv=None):
123      """standalone command line access point"""
124      if argv is None:
125          argv = sys.argv[1:]
126      from getopt import getopt
127      s_opts = 'hdi'
128 -    l_opts = ('help', 'duplicates=', 'ignore-comments')
129 +    l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
130 +              'ignore-docstrings')
131      min_lines = 4
132      ignore_comments = False
133 +    ignore_docstrings = False
134 +    ignore_imports = False
135      opts, args = getopt(argv, s_opts, l_opts)
136      for opt, val in opts:
137          if opt in ('-d', '--duplicates'):
138              min_lines = int(val)
139          elif opt in ('-h', '--help'):
140              usage()
141          elif opt in ('-i', '--ignore-comments'):
142              ignore_comments = True
143 +        elif opt in ('--ignore-docstrings'):
144 +            ignore_docstrings = True
145 +        elif opt in ('--ignore-imports'):
146 +            ignore_imports = True
147      if not args:
148          usage(1)
149 -    sim = Similar(min_lines, ignore_comments)
150 +    sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
151      for filename in args:
152          sim.append_stream(filename, open(filename))
153      sim.run()
154      sys.exit(0)
155 
diff --git a/test/input/similar1 b/test/input/similar1
@@ -1,19 +1,22 @@
156 -this file is used
157 -to check the similar 
158 -command line tool
159 -
160 -see the similar2 file which is almost the
161 -same file as this one. 
162 -more than 4
163 -identical lines should
164 -be # ignore comments !
165 -detected
166 -
167 -
168 -h�h�h�h
169 +import one
170 +from two import two
171 +three
172 +four
173 +five
174 +six # comments optionally ignored
175 +seven
176 +eight
177 +nine
178 +''' ten
179 +eleven
180 +twelve '''
181 +thirteen
182 +fourteen
183 +fifteen
184 
185 
186 
187 
188 -
189 -Yo !
190 +sixteen
191 +seventeen
192 +eighteen
diff --git a/test/input/similar2 b/test/input/similar2
@@ -1,19 +1,22 @@
193 -this file is used
194 -to check the similar 
195 -command line tool
196 -
197 -see the similar1 file which is almost the
198 -same file as this one. 
199 -more than 4
200 -identical lines should
201 -be
202 -detected
203 -
204 -
205 -hohohoh
206 +import one
207 +from two import two
208 +three
209 +four
210 +five
211 +six
212 +seven
213 +eight
214 +nine
215 +''' ten
216 +ELEVEN
217 +twelve '''
218 +thirteen
219 +fourteen
220 +FIFTEEN
221 
222 
223 
224 
225 -
226 -Yo !
227 +sixteen
228 +seventeen
229 +eighteen
diff --git a/test/test_similar.py b/test/test_similar.py
@@ -22,38 +22,101 @@
230          else:
231              self.fail('not system exit')
232          finally:
233              sys.stdout = sys.__stdout__
234          self.assertMultiLineEqual(output.strip(), ("""
235 -7 similar lines in 2 files
236 -==%s:5
237 -==%s:5
238 -   same file as this one. 
239 -   more than 4
240 -   identical lines should
241 -   be
242 -   detected
243 -   
244 -   
245 -TOTAL lines=38 duplicates=7 percent=18.42
246 +10 similar lines in 2 files
247 +==%s:0
248 +==%s:0
249 +   import one
250 +   from two import two
251 +   three
252 +   four
253 +   five
254 +   six
255 +   seven
256 +   eight
257 +   nine
258 +   ''' ten
259 +TOTAL lines=44 duplicates=10 percent=22.73
260  """ % (SIMILAR1, SIMILAR2)).strip())
261 
262 
263 -    def test_dont_ignore_comments(self):
264 +    def test_ignore_docsrings(self):
265 +        sys.stdout = StringIO()
266 +        try:
267 +            similar.Run(['--ignore-docstrings', SIMILAR1, SIMILAR2])
268 +        except SystemExit, ex:
269 +            self.assertEqual(ex.code, 0)
270 +            output = sys.stdout.getvalue()
271 +        else:
272 +            self.fail('not system exit')
273 +        finally:
274 +            sys.stdout = sys.__stdout__
275 +        self.assertMultiLineEqual(output.strip(), ("""
276 +8 similar lines in 2 files
277 +==%s:6
278 +==%s:6
279 +   seven
280 +   eight
281 +   nine
282 +   ''' ten
283 +   ELEVEN
284 +   twelve '''
285 +   thirteen
286 +   fourteen
287 +
288 +5 similar lines in 2 files
289 +==%s:0
290 +==%s:0
291 +   import one
292 +   from two import two
293 +   three
294 +   four
295 +   five
296 +TOTAL lines=44 duplicates=13 percent=29.55
297 +""" % ((SIMILAR1, SIMILAR2) * 2)).strip())
298 +
299 +
300 +    def test_ignore_imports(self):
301 +        sys.stdout = StringIO()
302 +        try:
303 +            similar.Run(['--ignore-imports', SIMILAR1, SIMILAR2])
304 +        except SystemExit, ex:
305 +            self.assertEqual(ex.code, 0)
306 +            output = sys.stdout.getvalue()
307 +        else:
308 +            self.fail('not system exit')
309 +        finally:
310 +            sys.stdout = sys.__stdout__
311 +        self.assertMultiLineEqual(output.strip(), """
312 +TOTAL lines=44 duplicates=0 percent=0.00
313 +""".strip())
314 +
315 +
316 +    def test_ignore_nothing(self):
317          sys.stdout = StringIO()
318          try:
319              similar.Run([SIMILAR1, SIMILAR2])
320          except SystemExit, ex:
321              self.assertEqual(ex.code, 0)
322              output = sys.stdout.getvalue()
323          else:
324              self.fail('not system exit')
325          finally:
326              sys.stdout = sys.__stdout__
327 -        self.assertMultiLineEqual(output.strip(), """
328 -TOTAL lines=38 duplicates=0 percent=0.00
329 -        """.strip())
330 +        self.assertMultiLineEqual(output.strip(), ("""
331 +5 similar lines in 2 files
332 +==%s:0
333 +==%s:0
334 +   import one
335 +   from two import two
336 +   three
337 +   four
338 +   five
339 +TOTAL lines=44 duplicates=5 percent=11.36
340 +""" % (SIMILAR1, SIMILAR2)).strip())
341 
342      def test_help(self):
343          sys.stdout = StringIO()
344          try:
345              similar.Run(['--help'])