2011-10-07 07:41:21 +00:00
|
|
|
#! /usr/bin/python
|
|
|
|
|
2019-03-22 11:40:58 +01:00
|
|
|
from __future__ import print_function
|
|
|
|
|
2011-10-09 04:40:22 +00:00
|
|
|
import os, sys, compiler
|
|
|
|
from compiler.ast import Node, For, While, ListComp, AssName, Name, Lambda, Function
|
2011-10-07 07:41:21 +00:00
|
|
|
|
|
|
|
|
|
|
|
def check_source(source):
|
|
|
|
return check_thing(compiler.parse, source)
|
|
|
|
|
|
|
|
def check_file(path):
|
|
|
|
return check_thing(compiler.parseFile, path)
|
|
|
|
|
|
|
|
def check_thing(parser, thing):
|
|
|
|
try:
|
|
|
|
ast = parser(thing)
|
2019-03-28 12:45:28 +01:00
|
|
|
except SyntaxError as e:
|
2011-10-09 05:03:01 +00:00
|
|
|
return e
|
2011-10-07 07:41:21 +00:00
|
|
|
else:
|
|
|
|
results = []
|
|
|
|
check_ast(ast, results)
|
|
|
|
return results
|
|
|
|
|
|
|
|
def check_ast(ast, results):
|
2011-10-09 04:40:22 +00:00
|
|
|
"""Check a node outside a loop."""
|
|
|
|
if isinstance(ast, (For, While, ListComp)):
|
|
|
|
check_loop(ast, results)
|
2011-10-07 07:41:21 +00:00
|
|
|
else:
|
|
|
|
for child in ast.getChildNodes():
|
|
|
|
if isinstance(ast, Node):
|
|
|
|
check_ast(child, results)
|
|
|
|
|
2011-10-09 04:40:22 +00:00
|
|
|
def check_loop(ast, results):
|
|
|
|
"""Check a particular outer loop."""
|
|
|
|
|
|
|
|
# List comprehensions have a poorly designed AST of the form
|
|
|
|
# ListComp(exprNode, [ListCompFor(...), ...]), in which the
|
|
|
|
# result expression is outside the ListCompFor node even though
|
|
|
|
# it is logically inside the loop(s).
|
|
|
|
# There may be multiple ListCompFor nodes (in cases such as
|
|
|
|
# [lambda: (a,b) for a in ... for b in ...]
|
|
|
|
# ), and that case they are not nested in the AST. But these
|
|
|
|
# warts (nonobviously) happen not to matter for our analysis.
|
2011-10-07 07:41:21 +00:00
|
|
|
|
2011-10-09 04:48:00 +00:00
|
|
|
assigned = {} # maps name to lineno of topmost assignment
|
2011-10-07 07:41:21 +00:00
|
|
|
nested = set()
|
2011-10-09 04:48:00 +00:00
|
|
|
collect_assigned_and_nested(ast, assigned, nested)
|
2011-10-07 07:41:21 +00:00
|
|
|
|
|
|
|
# For each nested function...
|
|
|
|
for funcnode in nested:
|
|
|
|
# Check for captured variables in this function.
|
|
|
|
captured = set()
|
2011-10-09 04:50:23 +00:00
|
|
|
collect_captured(funcnode, assigned, captured, False)
|
2011-10-07 07:41:21 +00:00
|
|
|
for name in captured:
|
|
|
|
# We want to report the outermost capturing function
|
|
|
|
# (since that is where the workaround will need to be
|
2011-10-09 04:48:00 +00:00
|
|
|
# added), and the topmost assignment to the variable.
|
|
|
|
# Just one report per capturing function per variable
|
|
|
|
# will do.
|
|
|
|
results.append(make_result(funcnode, name, assigned[name]))
|
2011-10-07 07:41:21 +00:00
|
|
|
|
|
|
|
# Check each node in the function body in case it
|
|
|
|
# contains another 'for' loop.
|
|
|
|
childnodes = funcnode.getChildNodes()[len(funcnode.defaults):]
|
|
|
|
for child in childnodes:
|
2017-12-31 00:23:43 +01:00
|
|
|
check_ast(child, results)
|
2011-10-07 07:41:21 +00:00
|
|
|
|
2011-10-09 04:48:00 +00:00
|
|
|
def collect_assigned_and_nested(ast, assigned, nested):
|
2011-10-07 07:41:21 +00:00
|
|
|
"""
|
2011-10-09 04:48:00 +00:00
|
|
|
Collect the names assigned in this loop, not including names
|
|
|
|
assigned in nested functions. Also collect the nodes of functions
|
|
|
|
that are nested one level deep.
|
2011-10-07 07:41:21 +00:00
|
|
|
"""
|
|
|
|
if isinstance(ast, AssName):
|
2011-10-09 04:48:00 +00:00
|
|
|
if ast.name not in assigned or assigned[ast.name] > ast.lineno:
|
|
|
|
assigned[ast.name] = ast.lineno
|
2011-10-07 07:41:21 +00:00
|
|
|
else:
|
|
|
|
childnodes = ast.getChildNodes()
|
|
|
|
if isinstance(ast, (Lambda, Function)):
|
|
|
|
nested.add(ast)
|
|
|
|
|
|
|
|
# The default argument expressions are "outside" the
|
|
|
|
# function, even though they are children of the
|
|
|
|
# Lambda or Function node.
|
|
|
|
childnodes = childnodes[:len(ast.defaults)]
|
|
|
|
|
|
|
|
for child in childnodes:
|
|
|
|
if isinstance(ast, Node):
|
2011-10-09 04:48:00 +00:00
|
|
|
collect_assigned_and_nested(child, assigned, nested)
|
2011-10-07 07:41:21 +00:00
|
|
|
|
2011-10-09 04:50:23 +00:00
|
|
|
def collect_captured(ast, assigned, captured, in_function_yet):
|
2011-10-09 04:48:00 +00:00
|
|
|
"""Collect any captured variables that are also in assigned."""
|
2011-10-07 07:41:21 +00:00
|
|
|
if isinstance(ast, Name):
|
2011-10-09 04:48:00 +00:00
|
|
|
if ast.name in assigned:
|
2011-10-07 07:41:21 +00:00
|
|
|
captured.add(ast.name)
|
|
|
|
else:
|
|
|
|
childnodes = ast.getChildNodes()
|
|
|
|
if isinstance(ast, (Lambda, Function)):
|
|
|
|
# Formal parameters of the function are excluded from
|
|
|
|
# captures we care about in subnodes of the function body.
|
2011-10-09 04:48:00 +00:00
|
|
|
new_assigned = assigned.copy()
|
|
|
|
remove_argnames(ast.argnames, new_assigned)
|
2011-10-07 07:41:21 +00:00
|
|
|
|
2011-10-09 04:50:23 +00:00
|
|
|
if len(new_assigned) > 0:
|
|
|
|
for child in childnodes[len(ast.defaults):]:
|
|
|
|
collect_captured(child, new_assigned, captured, True)
|
2011-10-07 07:41:21 +00:00
|
|
|
|
2011-10-09 04:50:23 +00:00
|
|
|
# The default argument expressions are "outside" *this*
|
|
|
|
# function, even though they are children of the Lambda or
|
|
|
|
# Function node.
|
|
|
|
if not in_function_yet:
|
|
|
|
return
|
2011-10-07 07:41:21 +00:00
|
|
|
childnodes = childnodes[:len(ast.defaults)]
|
|
|
|
|
|
|
|
for child in childnodes:
|
|
|
|
if isinstance(ast, Node):
|
2011-10-09 04:50:23 +00:00
|
|
|
collect_captured(child, assigned, captured, True)
|
2011-10-07 07:41:21 +00:00
|
|
|
|
|
|
|
|
2011-10-09 04:47:10 +00:00
|
|
|
def remove_argnames(names, fromset):
|
|
|
|
for element in names:
|
|
|
|
if element in fromset:
|
|
|
|
del fromset[element]
|
|
|
|
elif isinstance(element, (tuple, list)):
|
|
|
|
remove_argnames(element, fromset)
|
|
|
|
|
|
|
|
|
2011-10-07 07:41:21 +00:00
|
|
|
def make_result(funcnode, var_name, var_lineno):
|
|
|
|
if hasattr(funcnode, 'name'):
|
|
|
|
func_name = 'function %r' % (funcnode.name,)
|
|
|
|
else:
|
|
|
|
func_name = '<lambda>'
|
|
|
|
return (funcnode.lineno, func_name, var_name, var_lineno)
|
|
|
|
|
|
|
|
def report(out, path, results):
|
|
|
|
for r in results:
|
2019-03-22 11:40:58 +01:00
|
|
|
print(path + (":%r %s captures %r assigned at line %d" % r), file=out)
|
2011-10-07 07:41:21 +00:00
|
|
|
|
|
|
|
def check(sources, out):
|
2019-05-15 08:17:44 +02:00
|
|
|
class Counts(object):
|
2011-10-07 07:41:21 +00:00
|
|
|
n = 0
|
|
|
|
processed_files = 0
|
|
|
|
suspect_files = 0
|
2011-10-09 05:03:01 +00:00
|
|
|
error_files = 0
|
2011-10-07 07:41:21 +00:00
|
|
|
counts = Counts()
|
|
|
|
|
|
|
|
def _process(path):
|
|
|
|
results = check_file(path)
|
2011-10-09 05:03:01 +00:00
|
|
|
if isinstance(results, SyntaxError):
|
2019-03-22 11:40:58 +01:00
|
|
|
print(path + (" NOT ANALYSED due to syntax error: %s" % results), file=out)
|
2011-10-09 05:03:01 +00:00
|
|
|
counts.error_files += 1
|
|
|
|
else:
|
|
|
|
report(out, path, results)
|
|
|
|
counts.n += len(results)
|
|
|
|
counts.processed_files += 1
|
|
|
|
if len(results) > 0:
|
|
|
|
counts.suspect_files += 1
|
2011-10-07 07:41:21 +00:00
|
|
|
|
|
|
|
for source in sources:
|
2019-03-22 11:40:58 +01:00
|
|
|
print("Checking %s..." % (source,), file=out)
|
2011-10-07 07:41:21 +00:00
|
|
|
if os.path.isfile(source):
|
|
|
|
_process(source)
|
|
|
|
else:
|
|
|
|
for (dirpath, dirnames, filenames) in os.walk(source):
|
|
|
|
for fn in filenames:
|
|
|
|
(basename, ext) = os.path.splitext(fn)
|
|
|
|
if ext == '.py':
|
|
|
|
_process(os.path.join(dirpath, fn))
|
|
|
|
|
2019-03-22 11:40:58 +01:00
|
|
|
print("%d suspiciously captured variables in %d out of %d file(s)."
|
|
|
|
% (counts.n, counts.suspect_files, counts.processed_files), file=out)
|
2011-10-09 05:03:01 +00:00
|
|
|
if counts.error_files > 0:
|
2019-03-22 11:40:58 +01:00
|
|
|
print("%d file(s) not processed due to syntax errors."
|
|
|
|
% (counts.error_files,), file=out)
|
2011-10-07 07:41:21 +00:00
|
|
|
return counts.n
|
|
|
|
|
|
|
|
|
|
|
|
sources = ['src']
|
|
|
|
if len(sys.argv) > 1:
|
|
|
|
sources = sys.argv[1:]
|
|
|
|
if check(sources, sys.stderr) > 0:
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: self-tests
|