Attached Documents

add --ignore-imports to code duplication (R0801) / symilar (ignore-similar-imports.txt)

add --ignore-imports to code duplication (R0801) / symilar (ignore-similar-imports.txt)

Patch adds requested functionality. Downloaded from https://bitbucket.org/Ry4an/pylint-ignore-similar-imports/raw/tip/ignore-similar-imports where it was also submitted as a pull request.

download

# HG changeset patch
# Parent 7a1e32ae0c60d872568382a76f3621ce72a81b31
Add --ignore-imports option to similarity checking. Closes #106534.

Additionally:
- add access to existing --ignore-docstrings option to symilar command line
- add access to new --ignore-imports option to symilar command line
- add test for existing --ignore-docstring feature
- add test for new --ignore-imports feature

diff -r 7a1e32ae0c60 ChangeLog
--- a/ChangeLog Thu Sep 20 06:54:35 2012 +0200
+++ b/ChangeLog Sat Sep 29 23:47:36 2012 -0400
@@ -2,6 +2,9 @@
====================

--
+ * #106534: add --ignore-imports option to code similarity checking
+ and 'symilar' command line tool (patch by Ry4an Brase)
+
* #104571: check for anomalous backslash escape, introducing new
W1401 and W1402 messages (patch by Martin Pool)

diff -r 7a1e32ae0c60 checkers/similar.py
--- a/checkers/similar.py Thu Sep 20 06:54:35 2012 +0200
+++ b/checkers/similar.py Sat Sep 29 23:47:36 2012 -0400
@@ -29,10 +29,11 @@
"""finds copy-pasted lines of code in a project"""

def __init__(self, min_lines=4, ignore_comments=False,
- ignore_docstrings=False):
+ ignore_docstrings=False, ignore_imports=False):
self.min_lines = min_lines
self.ignore_comments = ignore_comments
self.ignore_docstrings = ignore_docstrings
+ self.ignore_imports = ignore_imports
self.linesets = []

def append_stream(self, streamid, stream):
@@ -41,7 +42,8 @@
self.linesets.append(LineSet(streamid,
stream.readlines(),
self.ignore_comments,
- self.ignore_docstrings))
+ self.ignore_docstrings,
+ self.ignore_imports))

def run(self):
"""start looking for similarities and display results on stdout"""
@@ -123,7 +125,11 @@
for sim in self._find_common(lineset, lineset2):
yield sim

-def stripped_lines(lines, ignore_comments, ignore_docstrings):
+def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
+ """return lines with leading/trailing whitespace and any ignored code
+ features removed
+ """
+
strippedlines = []
docstring = None
for line in lines:
@@ -137,6 +143,9 @@
if line.endswith(docstring):
docstring = None
line = ''
+ if ignore_imports:
+ if line.startswith("import ") or line.startswith("from "):
+ line = ''
if ignore_comments:
# XXX should use regex in checkers/format to avoid cutting
# at a "#" in a string
@@ -147,11 +156,12 @@
class LineSet:
"""Holds and indexes all the lines of a single source file"""
def __init__(self, name, lines, ignore_comments=False,
- ignore_docstrings=False):
+ ignore_docstrings=False, ignore_imports=False):
self.name = name
self._real_lines = lines
self._stripped_lines = stripped_lines(lines, ignore_comments,
- ignore_docstrings)
+ ignore_docstrings,
+ ignore_imports)
self._index = self._mk_index()

def __str__(self):
@@ -236,6 +246,10 @@
{'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
'help': 'Ignore docstrings when computing similarities.'}
),
+ ('ignore-imports',
+ {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
+ 'help': 'Ignore imports when computing similarities.'}
+ ),
)
# reports
reports = ( ('RP0801', 'Duplication', report_similarities), )
@@ -258,6 +272,8 @@
self.ignore_comments = self.config.ignore_comments
elif optname == 'ignore-docstrings':
self.ignore_docstrings = self.config.ignore_docstrings
+ elif optname == 'ignore-imports':
+ self.ignore_imports = self.config.ignore_imports

def open(self):
"""init the checkers: reset linesets and statistics information"""
@@ -302,7 +318,7 @@
print "finds copy pasted blocks in a set of files"
print
print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \
-[-i|--ignore-comments] file1...'
+[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...'
sys.exit(status)

def Run(argv=None):
@@ -311,9 +327,12 @@
argv = sys.argv[1:]
from getopt import getopt
s_opts = 'hdi'
- l_opts = ('help', 'duplicates=', 'ignore-comments')
+ l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
+ 'ignore-docstrings')
min_lines = 4
ignore_comments = False
+ ignore_docstrings = False
+ ignore_imports = False
opts, args = getopt(argv, s_opts, l_opts)
for opt, val in opts:
if opt in ('-d', '--duplicates'):
@@ -322,9 +341,13 @@
usage()
elif opt in ('-i', '--ignore-comments'):
ignore_comments = True
+ elif opt in ('--ignore-docstrings'):
+ ignore_docstrings = True
+ elif opt in ('--ignore-imports'):
+ ignore_imports = True
if not args:
usage(1)
- sim = Similar(min_lines, ignore_comments)
+ sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
for filename in args:
sim.append_stream(filename, open(filename))
sim.run()
diff -r 7a1e32ae0c60 test/input/similar1
--- a/test/input/similar1 Thu Sep 20 06:54:35 2012 +0200
+++ b/test/input/similar1 Sat Sep 29 23:47:36 2012 -0400
@@ -1,19 +1,22 @@
-this file is used
-to check the similar
-command line tool
+import one
+from two import two
+three
+four
+five
+six # comments optionally ignored
+seven
+eight
+nine
+''' ten
+eleven
+twelve '''
+thirteen
+fourteen
+fifteen

-see the similar2 file which is almost the
-same file as this one.
-more than 4
-identical lines should
-be # ignore comments !
-detected

-héhéhéh

-
-
-
-
-Yo !
+sixteen
+seventeen
+eighteen
diff -r 7a1e32ae0c60 test/input/similar2
--- a/test/input/similar2 Thu Sep 20 06:54:35 2012 +0200
+++ b/test/input/similar2 Sat Sep 29 23:47:36 2012 -0400
@@ -1,19 +1,22 @@
-this file is used
-to check the similar
-command line tool
+import one
+from two import two
+three
+four
+five
+six
+seven
+eight
+nine
+''' ten
+ELEVEN
+twelve '''
+thirteen
+fourteen
+FIFTEEN

-see the similar1 file which is almost the
-same file as this one.
-more than 4
-identical lines should
-be
-detected

-hohohoh

-
-
-
-
-Yo !
+sixteen
+seventeen
+eighteen
diff -r 7a1e32ae0c60 test/test_similar.py
--- a/test/test_similar.py Thu Sep 20 06:54:35 2012 +0200
+++ b/test/test_similar.py Sat Sep 29 23:47:36 2012 -0400
@@ -24,21 +24,76 @@
finally:
sys.stdout = sys.__stdout__
self.assertMultiLineEqual(output.strip(), ("""
-7 similar lines in 2 files
-==%s:5
-==%s:5
- same file as this one.
- more than 4
- identical lines should
- be
- detected
-
-
-TOTAL lines=38 duplicates=7 percent=18.42
+10 similar lines in 2 files
+==%s:0
+==%s:0
+ import one
+ from two import two
+ three
+ four
+ five
+ six
+ seven
+ eight
+ nine
+ ''' ten
+TOTAL lines=44 duplicates=10 percent=22.73
""" % (SIMILAR1, SIMILAR2)).strip())

- def test_dont_ignore_comments(self):
+ def test_ignore_docsrings(self):
+ sys.stdout = StringIO()
+ try:
+ similar.Run(['--ignore-docstrings', SIMILAR1, SIMILAR2])
+ except SystemExit, ex:
+ self.assertEqual(ex.code, 0)
+ output = sys.stdout.getvalue()
+ else:
+ self.fail('not system exit')
+ finally:
+ sys.stdout = sys.__stdout__
+ self.assertMultiLineEqual(output.strip(), ("""
+8 similar lines in 2 files
+==%s:6
+==%s:6
+ seven
+ eight
+ nine
+ ''' ten
+ ELEVEN
+ twelve '''
+ thirteen
+ fourteen
+
+5 similar lines in 2 files
+==%s:0
+==%s:0
+ import one
+ from two import two
+ three
+ four
+ five
+TOTAL lines=44 duplicates=13 percent=29.55
+""" % ((SIMILAR1, SIMILAR2) * 2)).strip())
+
+
+ def test_ignore_imports(self):
+ sys.stdout = StringIO()
+ try:
+ similar.Run(['--ignore-imports', SIMILAR1, SIMILAR2])
+ except SystemExit, ex:
+ self.assertEqual(ex.code, 0)
+ output = sys.stdout.getvalue()
+ else:
+ self.fail('not system exit')
+ finally:
+ sys.stdout = sys.__stdout__
+ self.assertMultiLineEqual(output.strip(), """
+TOTAL lines=44 duplicates=0 percent=0.00
+""".strip())
+
+
+ def test_ignore_nothing(self):
sys.stdout = StringIO()
try:
similar.Run([SIMILAR1, SIMILAR2])
@@ -49,9 +104,17 @@
self.fail('not system exit')
finally:
sys.stdout = sys.__stdout__
- self.assertMultiLineEqual(output.strip(), """
-TOTAL lines=38 duplicates=0 percent=0.00
- """.strip())
+ self.assertMultiLineEqual(output.strip(), ("""
+5 similar lines in 2 files
+==%s:0
+==%s:0
+ import one
+ from two import two
+ three
+ four
+ five
+TOTAL lines=44 duplicates=5 percent=11.36
+""" % (SIMILAR1, SIMILAR2)).strip())

def test_help(self):
sys.stdout = StringIO()