Switch to pylint instead of custom AST parsing that wouldn't work on Python 3.

This commit is contained in:
Itamar Turner-Trauring 2021-06-14 11:24:10 -04:00
parent 744f9bab4e
commit 28220d8a16
2 changed files with 23 additions and 200 deletions

View File

@ -1,186 +0,0 @@
#! /usr/bin/python
from __future__ import print_function
import os, sys, compiler
from compiler.ast import Node, For, While, ListComp, AssName, Name, Lambda, Function
def check_source(source):
return check_thing(compiler.parse, source)
def check_file(path):
return check_thing(compiler.parseFile, path)
def check_thing(parser, thing):
try:
ast = parser(thing)
except SyntaxError as e:
return e
else:
results = []
check_ast(ast, results)
return results
def check_ast(ast, results):
"""Check a node outside a loop."""
if isinstance(ast, (For, While, ListComp)):
check_loop(ast, results)
else:
for child in ast.getChildNodes():
if isinstance(ast, Node):
check_ast(child, results)
def check_loop(ast, results):
"""Check a particular outer loop."""
# List comprehensions have a poorly designed AST of the form
# ListComp(exprNode, [ListCompFor(...), ...]), in which the
# result expression is outside the ListCompFor node even though
# it is logically inside the loop(s).
# There may be multiple ListCompFor nodes (in cases such as
# [lambda: (a,b) for a in ... for b in ...]
# ), and that case they are not nested in the AST. But these
# warts (nonobviously) happen not to matter for our analysis.
assigned = {} # maps name to lineno of topmost assignment
nested = set()
collect_assigned_and_nested(ast, assigned, nested)
# For each nested function...
for funcnode in nested:
# Check for captured variables in this function.
captured = set()
collect_captured(funcnode, assigned, captured, False)
for name in captured:
# We want to report the outermost capturing function
# (since that is where the workaround will need to be
# added), and the topmost assignment to the variable.
# Just one report per capturing function per variable
# will do.
results.append(make_result(funcnode, name, assigned[name]))
# Check each node in the function body in case it
# contains another 'for' loop.
childnodes = funcnode.getChildNodes()[len(funcnode.defaults):]
for child in childnodes:
check_ast(child, results)
def collect_assigned_and_nested(ast, assigned, nested):
"""
Collect the names assigned in this loop, not including names
assigned in nested functions. Also collect the nodes of functions
that are nested one level deep.
"""
if isinstance(ast, AssName):
if ast.name not in assigned or assigned[ast.name] > ast.lineno:
assigned[ast.name] = ast.lineno
else:
childnodes = ast.getChildNodes()
if isinstance(ast, (Lambda, Function)):
nested.add(ast)
# The default argument expressions are "outside" the
# function, even though they are children of the
# Lambda or Function node.
childnodes = childnodes[:len(ast.defaults)]
for child in childnodes:
if isinstance(ast, Node):
collect_assigned_and_nested(child, assigned, nested)
def collect_captured(ast, assigned, captured, in_function_yet):
"""Collect any captured variables that are also in assigned."""
if isinstance(ast, Name):
if ast.name in assigned:
captured.add(ast.name)
else:
childnodes = ast.getChildNodes()
if isinstance(ast, (Lambda, Function)):
# Formal parameters of the function are excluded from
# captures we care about in subnodes of the function body.
new_assigned = assigned.copy()
remove_argnames(ast.argnames, new_assigned)
if len(new_assigned) > 0:
for child in childnodes[len(ast.defaults):]:
collect_captured(child, new_assigned, captured, True)
# The default argument expressions are "outside" *this*
# function, even though they are children of the Lambda or
# Function node.
if not in_function_yet:
return
childnodes = childnodes[:len(ast.defaults)]
for child in childnodes:
if isinstance(ast, Node):
collect_captured(child, assigned, captured, True)
def remove_argnames(names, fromset):
for element in names:
if element in fromset:
del fromset[element]
elif isinstance(element, (tuple, list)):
remove_argnames(element, fromset)
def make_result(funcnode, var_name, var_lineno):
if hasattr(funcnode, 'name'):
func_name = 'function %r' % (funcnode.name,)
else:
func_name = '<lambda>'
return (funcnode.lineno, func_name, var_name, var_lineno)
def report(out, path, results):
for r in results:
print(path + (":%r %s captures %r assigned at line %d" % r), file=out)
def check(sources, out):
class Counts(object):
n = 0
processed_files = 0
suspect_files = 0
error_files = 0
counts = Counts()
def _process(path):
results = check_file(path)
if isinstance(results, SyntaxError):
print(path + (" NOT ANALYSED due to syntax error: %s" % results), file=out)
counts.error_files += 1
else:
report(out, path, results)
counts.n += len(results)
counts.processed_files += 1
if len(results) > 0:
counts.suspect_files += 1
for source in sources:
print("Checking %s..." % (source,), file=out)
if os.path.isfile(source):
_process(source)
else:
for (dirpath, dirnames, filenames) in os.walk(source):
for fn in filenames:
(basename, ext) = os.path.splitext(fn)
if ext == '.py':
_process(os.path.join(dirpath, fn))
print("%d suspiciously captured variables in %d out of %d file(s)."
% (counts.n, counts.suspect_files, counts.processed_files), file=out)
if counts.error_files > 0:
print("%d file(s) not processed due to syntax errors."
% (counts.error_files,), file=out)
return counts.n
sources = ['src']
if len(sys.argv) > 1:
sources = sys.argv[1:]
if check(sources, sys.stderr) > 0:
sys.exit(1)
# TODO: self-tests

37
tox.ini
View File

@ -109,12 +109,10 @@ commands =
coverage report
# Once 2.7 is dropped, this can be removed. It just does flake8 with Python 2
# since that can give different results than flake8 on Python 3.
[testenv:codechecks]
basepython = python2.7
# On macOS, git inside of towncrier needs $HOME.
passenv = HOME
whitelist_externals =
/bin/mv
setenv =
# Workaround an error when towncrier is run under the VCS hook,
# https://stackoverflow.com/a/4027726/624787:
@ -128,26 +126,37 @@ setenv =
DEFAULT_FILES=src integration static misc setup.py
commands =
flake8 {posargs:{env:DEFAULT_FILES}}
python misc/coding_tools/check-umids.py {posargs:{env:DEFAULT_FILES}}
python misc/coding_tools/check-debugging.py {posargs:{env:DEFAULT_FILES}}
python misc/coding_tools/find-trailing-spaces.py -r {posargs:{env:DEFAULT_FILES}}
python misc/coding_tools/check-miscaptures.py {posargs:{env:DEFAULT_FILES}}
# If towncrier.check fails, you forgot to add a towncrier news
# fragment explaining the change in this branch. Create one at
# `newsfragments/<ticket>.<change type>` with some text for the news
# file. See towncrier.toml for legal <change type> values.
python -m towncrier.check --config towncrier.toml
[testenv:codechecks3]
basepython = python3
deps =
# Newer versions of PyLint have buggy configuration
# (https://github.com/PyCQA/pylint/issues/4574), so stick to old version
# for now.
pylint < 2.5
# On macOS, git inside of towncrier needs $HOME.
passenv = HOME
whitelist_externals =
/bin/mv
setenv =
# If no positional arguments are given, try to run the checks on the
# entire codebase, including various pieces of supporting code.
DEFAULT_FILES=src integration static misc setup.py
commands =
flake8 {posargs:{env:DEFAULT_FILES}}
python misc/coding_tools/check-umids.py {posargs:{env:DEFAULT_FILES}}
python misc/coding_tools/check-debugging.py {posargs:{env:DEFAULT_FILES}}
python misc/coding_tools/find-trailing-spaces.py -r {posargs:{env:DEFAULT_FILES}}
# PyLint has other useful checks, might want to enable them:
# http://pylint.pycqa.org/en/latest/technical_reference/features.html
pylint --disable=all --enable=cell-var-from-loop {posargs:{env:DEFAULT_FILES}}
# If towncrier.check fails, you forgot to add a towncrier news
# fragment explaining the change in this branch. Create one at
# `newsfragments/<ticket>.<change type>` with some text for the news
# file. See towncrier.toml for legal <change type> values.
python -m towncrier.check --config towncrier.toml
[testenv:typechecks]