tahoe-lafs/misc/coding_tools/check-miscaptures.py

#! /usr/bin/python

from __future__ import print_function

import os, sys, compiler
from compiler.ast import Node, For, While, ListComp, AssName, Name, Lambda, Function


def check_source(source):
    return check_thing(compiler.parse, source)

def check_file(path):
    return check_thing(compiler.parseFile, path)

def check_thing(parser, thing):
    try:
        ast = parser(thing)
    except SyntaxError as e:
        return e
    else:
        results = []
        check_ast(ast, results)
        return results

def check_ast(ast, results):
    """Check a node outside a loop."""
    if isinstance(ast, (For, While, ListComp)):
        check_loop(ast, results)
    else:
        for child in ast.getChildNodes():
            if isinstance(ast, Node):
                check_ast(child, results)

def check_loop(ast, results):
    """Check a particular outer loop."""

    # List comprehensions have a poorly designed AST of the form
    # ListComp(exprNode, [ListCompFor(...), ...]), in which the
    # result expression is outside the ListCompFor node even though
    # it is logically inside the loop(s).
    # There may be multiple ListCompFor nodes (in cases such as
    #   [lambda: (a,b) for a in ... for b in ...]
    # ), and that case they are not nested in the AST. But these
    # warts (nonobviously) happen not to matter for our analysis.

    assigned = {}  # maps name to lineno of topmost assignment
    nested = set()
    collect_assigned_and_nested(ast, assigned, nested)

    # For each nested function...
    for funcnode in nested:
        # Check for captured variables in this function.
        captured = set()
        collect_captured(funcnode, assigned, captured, False)
        for name in captured:
            # We want to report the outermost capturing function
            # (since that is where the workaround will need to be
            # added), and the topmost assignment to the variable.
            # Just one report per capturing function per variable
            # will do.
            results.append(make_result(funcnode, name, assigned[name]))

        # Check each node in the function body in case it
        # contains another 'for' loop.
        childnodes = funcnode.getChildNodes()[len(funcnode.defaults):]
        for child in childnodes:
            check_ast(child, results)

def collect_assigned_and_nested(ast, assigned, nested):
    """
    Collect the names assigned in this loop, not including names
    assigned in nested functions. Also collect the nodes of functions
    that are nested one level deep.
    """
    if isinstance(ast, AssName):
        if ast.name not in assigned or assigned[ast.name] > ast.lineno:
            assigned[ast.name] = ast.lineno
    else:
        childnodes = ast.getChildNodes()
        if isinstance(ast, (Lambda, Function)):
            nested.add(ast)

            # The default argument expressions are "outside" the
            # function, even though they are children of the
            # Lambda or Function node.
            childnodes = childnodes[:len(ast.defaults)]

        for child in childnodes:
            if isinstance(ast, Node):
                collect_assigned_and_nested(child, assigned, nested)

def collect_captured(ast, assigned, captured, in_function_yet):
    """Collect any captured variables that are also in assigned."""
    if isinstance(ast, Name):
        if ast.name in assigned:
            captured.add(ast.name)
    else:
        childnodes = ast.getChildNodes()
        if isinstance(ast, (Lambda, Function)):
            # Formal parameters of the function are excluded from
            # captures we care about in subnodes of the function body.
            new_assigned = assigned.copy()
            remove_argnames(ast.argnames, new_assigned)

            if len(new_assigned) > 0:
                for child in childnodes[len(ast.defaults):]:
                    collect_captured(child, new_assigned, captured, True)

            # The default argument expressions are "outside" *this*
            # function, even though they are children of the Lambda or
            # Function node.
            if not in_function_yet:
                return
            childnodes = childnodes[:len(ast.defaults)]

        for child in childnodes:
            if isinstance(ast, Node):
                collect_captured(child, assigned, captured, True)


def remove_argnames(names, fromset):
    for element in names:
        if element in fromset:
            del fromset[element]
        elif isinstance(element, (tuple, list)):
            remove_argnames(element, fromset)


def make_result(funcnode, var_name, var_lineno):
    if hasattr(funcnode, 'name'):
        func_name = 'function %r' % (funcnode.name,)
    else:
        func_name = '<lambda>'
    return (funcnode.lineno, func_name, var_name, var_lineno)

def report(out, path, results):
    for r in results:
        print(path + (":%r %s captures %r assigned at line %d" % r), file=out)

def check(sources, out):
    class Counts:
        n = 0
        processed_files = 0
        suspect_files = 0
        error_files = 0
    counts = Counts()

    def _process(path):
        results = check_file(path)
        if isinstance(results, SyntaxError):
            print(path + (" NOT ANALYSED due to syntax error: %s" % results), file=out)
            counts.error_files += 1
        else:
            report(out, path, results)
            counts.n += len(results)
            counts.processed_files += 1
            if len(results) > 0:
                counts.suspect_files += 1

    for source in sources:
        print("Checking %s..." % (source,), file=out)
        if os.path.isfile(source):
            _process(source)
        else:
            for (dirpath, dirnames, filenames) in os.walk(source):
                for fn in filenames:
                    (basename, ext) = os.path.splitext(fn)
                    if ext == '.py':
                        _process(os.path.join(dirpath, fn))

    print("%d suspiciously captured variables in %d out of %d file(s)."
                  % (counts.n, counts.suspect_files, counts.processed_files), file=out)
    if counts.error_files > 0:
        print("%d file(s) not processed due to syntax errors."
                      % (counts.error_files,), file=out)
    return counts.n


sources = ['src']
if len(sys.argv) > 1:
    sources = sys.argv[1:]
if check(sources, sys.stderr) > 0:
    sys.exit(1)


# TODO: self-tests
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`#! /usr/bin/python`

use print function over print statement in ./misc/ directory 2019-03-22 10:40:58 +00:00			`from __future__ import print_function`

check-miscaptures.py: check while loops and list comprehensions as well as for loops. Also fix a pyflakes warning. refs #1555 2011-10-09 04:40:22 +00:00			`import os, sys, compiler`
			`from compiler.ast import Node, For, While, ListComp, AssName, Name, Lambda, Function`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00

			`def check_source(source):`
			`return check_thing(compiler.parse, source)`

			`def check_file(path):`
			`return check_thing(compiler.parseFile, path)`

			`def check_thing(parser, thing):`
			`try:`
			`ast = parser(thing)`
updated all python files to use pep-3110 exception syntax for python3 compatibility 2019-03-28 11:45:28 +00:00			`except SyntaxError as e:`
check-miscaptures.py: report the number of files that were not analysed due to syntax errors (and don't count them in the number of suspicious captures). refs #1555 2011-10-09 05:03:01 +00:00			`return e`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`else:`
			`results = []`
			`check_ast(ast, results)`
			`return results`

			`def check_ast(ast, results):`
check-miscaptures.py: check while loops and list comprehensions as well as for loops. Also fix a pyflakes warning. refs #1555 2011-10-09 04:40:22 +00:00			`"""Check a node outside a loop."""`
			`if isinstance(ast, (For, While, ListComp)):`
			`check_loop(ast, results)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`else:`
			`for child in ast.getChildNodes():`
			`if isinstance(ast, Node):`
			`check_ast(child, results)`

check-miscaptures.py: check while loops and list comprehensions as well as for loops. Also fix a pyflakes warning. refs #1555 2011-10-09 04:40:22 +00:00			`def check_loop(ast, results):`
			`"""Check a particular outer loop."""`

			`# List comprehensions have a poorly designed AST of the form`
			`# ListComp(exprNode, [ListCompFor(...), ...]), in which the`
			`# result expression is outside the ListCompFor node even though`
			`# it is logically inside the loop(s).`
			`# There may be multiple ListCompFor nodes (in cases such as`
			`# [lambda: (a,b) for a in ... for b in ...]`
			`# ), and that case they are not nested in the AST. But these`
			`# warts (nonobviously) happen not to matter for our analysis.`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`assigned = {} # maps name to lineno of topmost assignment`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`nested = set()`
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`collect_assigned_and_nested(ast, assigned, nested)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00
			`# For each nested function...`
			`for funcnode in nested:`
			`# Check for captured variables in this function.`
			`captured = set()`
check-miscaptures.py: handle corner cases around default arguments correctly. Also make a minor optimization when there are no assigned variables to consider. refs #1555 2011-10-09 04:50:23 +00:00			`collect_captured(funcnode, assigned, captured, False)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`for name in captured:`
			`# We want to report the outermost capturing function`
			`# (since that is where the workaround will need to be`
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`# added), and the topmost assignment to the variable.`
			`# Just one report per capturing function per variable`
			`# will do.`
			`results.append(make_result(funcnode, name, assigned[name]))`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00
			`# Check each node in the function body in case it`
			`# contains another 'for' loop.`
			`childnodes = funcnode.getChildNodes()[len(funcnode.defaults):]`
			`for child in childnodes:`
hush somewhat-real problems found by static analysis These would have caused problems if a certain unlikely code path was taken: mostly error-handling pathways. 2017-12-30 23:23:43 +00:00			`check_ast(child, results)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`def collect_assigned_and_nested(ast, assigned, nested):`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`"""`
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`Collect the names assigned in this loop, not including names`
			`assigned in nested functions. Also collect the nodes of functions`
			`that are nested one level deep.`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`"""`
			`if isinstance(ast, AssName):`
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`if ast.name not in assigned or assigned[ast.name] > ast.lineno:`
			`assigned[ast.name] = ast.lineno`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`else:`
			`childnodes = ast.getChildNodes()`
			`if isinstance(ast, (Lambda, Function)):`
			`nested.add(ast)`

			`# The default argument expressions are "outside" the`
			`# function, even though they are children of the`
			`# Lambda or Function node.`
			`childnodes = childnodes[:len(ast.defaults)]`

			`for child in childnodes:`
			`if isinstance(ast, Node):`
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`collect_assigned_and_nested(child, assigned, nested)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00
check-miscaptures.py: handle corner cases around default arguments correctly. Also make a minor optimization when there are no assigned variables to consider. refs #1555 2011-10-09 04:50:23 +00:00			`def collect_captured(ast, assigned, captured, in_function_yet):`
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`"""Collect any captured variables that are also in assigned."""`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`if isinstance(ast, Name):`
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`if ast.name in assigned:`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`captured.add(ast.name)`
			`else:`
			`childnodes = ast.getChildNodes()`
			`if isinstance(ast, (Lambda, Function)):`
			`# Formal parameters of the function are excluded from`
			`# captures we care about in subnodes of the function body.`
check-miscaptures.py: Python doesn't really have declarations; report the topmost assignment. refs #1555 2011-10-09 04:48:00 +00:00			`new_assigned = assigned.copy()`
			`remove_argnames(ast.argnames, new_assigned)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00
check-miscaptures.py: handle corner cases around default arguments correctly. Also make a minor optimization when there are no assigned variables to consider. refs #1555 2011-10-09 04:50:23 +00:00			`if len(new_assigned) > 0:`
			`for child in childnodes[len(ast.defaults):]:`
			`collect_captured(child, new_assigned, captured, True)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00
check-miscaptures.py: handle corner cases around default arguments correctly. Also make a minor optimization when there are no assigned variables to consider. refs #1555 2011-10-09 04:50:23 +00:00			`# The default argument expressions are "outside" this`
			`# function, even though they are children of the Lambda or`
			`# Function node.`
			`if not in_function_yet:`
			`return`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`childnodes = childnodes[:len(ast.defaults)]`

			`for child in childnodes:`
			`if isinstance(ast, Node):`
check-miscaptures.py: handle corner cases around default arguments correctly. Also make a minor optimization when there are no assigned variables to consider. refs #1555 2011-10-09 04:50:23 +00:00			`collect_captured(child, assigned, captured, True)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00

check-miscaptures.py: handle destructuring function arguments correctly. refs #1555 2011-10-09 04:47:10 +00:00			`def remove_argnames(names, fromset):`
			`for element in names:`
			`if element in fromset:`
			`del fromset[element]`
			`elif isinstance(element, (tuple, list)):`
			`remove_argnames(element, fromset)`


Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`def make_result(funcnode, var_name, var_lineno):`
			`if hasattr(funcnode, 'name'):`
			`func_name = 'function %r' % (funcnode.name,)`
			`else:`
			`func_name = '<lambda>'`
			`return (funcnode.lineno, func_name, var_name, var_lineno)`

			`def report(out, path, results):`
			`for r in results:`
use print function over print statement in ./misc/ directory 2019-03-22 10:40:58 +00:00			`print(path + (":%r %s captures %r assigned at line %d" % r), file=out)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00
			`def check(sources, out):`
			`class Counts:`
			`n = 0`
			`processed_files = 0`
			`suspect_files = 0`
check-miscaptures.py: report the number of files that were not analysed due to syntax errors (and don't count them in the number of suspicious captures). refs #1555 2011-10-09 05:03:01 +00:00			`error_files = 0`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`counts = Counts()`

			`def _process(path):`
			`results = check_file(path)`
check-miscaptures.py: report the number of files that were not analysed due to syntax errors (and don't count them in the number of suspicious captures). refs #1555 2011-10-09 05:03:01 +00:00			`if isinstance(results, SyntaxError):`
use print function over print statement in ./misc/ directory 2019-03-22 10:40:58 +00:00			`print(path + (" NOT ANALYSED due to syntax error: %s" % results), file=out)`
check-miscaptures.py: report the number of files that were not analysed due to syntax errors (and don't count them in the number of suspicious captures). refs #1555 2011-10-09 05:03:01 +00:00			`counts.error_files += 1`
			`else:`
			`report(out, path, results)`
			`counts.n += len(results)`
			`counts.processed_files += 1`
			`if len(results) > 0:`
			`counts.suspect_files += 1`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00
			`for source in sources:`
use print function over print statement in ./misc/ directory 2019-03-22 10:40:58 +00:00			`print("Checking %s..." % (source,), file=out)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`if os.path.isfile(source):`
			`_process(source)`
			`else:`
			`for (dirpath, dirnames, filenames) in os.walk(source):`
			`for fn in filenames:`
			`(basename, ext) = os.path.splitext(fn)`
			`if ext == '.py':`
			`_process(os.path.join(dirpath, fn))`

use print function over print statement in ./misc/ directory 2019-03-22 10:40:58 +00:00			`print("%d suspiciously captured variables in %d out of %d file(s)."`
			`% (counts.n, counts.suspect_files, counts.processed_files), file=out)`
check-miscaptures.py: report the number of files that were not analysed due to syntax errors (and don't count them in the number of suspicious captures). refs #1555 2011-10-09 05:03:01 +00:00			`if counts.error_files > 0:`
use print function over print statement in ./misc/ directory 2019-03-22 10:40:58 +00:00			`print("%d file(s) not processed due to syntax errors."`
			`% (counts.error_files,), file=out)`
Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 2011-10-07 07:41:21 +00:00			`return counts.n`


			`sources = ['src']`
			`if len(sys.argv) > 1:`
			`sources = sys.argv[1:]`
			`if check(sources, sys.stderr) > 0:`
			`sys.exit(1)`


			`# TODO: self-tests`