tahoe-lafs/src/allmydata/util/figleaf.py

401 lines
10 KiB
Python

#! /usr/bin/env python
"""
figleaf is another tool to trace code coverage (yes, in Python ;).
figleaf uses the sys.settrace hook to record which statements are
executed by the CPython interpreter; this record can then be saved
into a file, or otherwise communicated back to a reporting script.
figleaf differs from the gold standard of coverage tools
('coverage.py') in several ways. First and foremost, figleaf uses the
same criterion for "interesting" lines of code as the sys.settrace
function, which obviates some of the complexity in coverage.py (but
does mean that your "loc" count goes down). Second, figleaf does not
record code executed in the Python standard library, which results in
a significant speedup. And third, the format in which the coverage
format is saved is very simple and easy to work with.
You might want to use figleaf if you're recording coverage from
multiple types of tests and need to aggregate the coverage in
interesting ways, and/or control when coverage is recorded.
coverage.py is a better choice for command-line execution, and its
reporting is a fair bit nicer.
Command line usage: ::
figleaf.py <python file to execute> <args to python file>
The figleaf output is saved into the file '.figleaf', which is an
*aggregate* of coverage reports from all figleaf runs from this
directory. '.figleaf' contains a pickled dictionary of sets; the keys
are source code filenames, and the sets contain all line numbers
executed by the Python interpreter. See the docs or command-line
programs in bin/ for more information.
High level API: ::
* ``start(ignore_lib=True)`` -- start recording code coverage.
* ``stop()`` -- stop recording code coverage.
* ``get_trace_obj()`` -- return the (singleton) trace object.
* ``get_info()`` -- get the coverage dictionary
Classes & functions worth knowing about, i.e. a lower level API:
* ``get_lines(fp)`` -- return the set of interesting lines in the fp.
* ``combine_coverage(d1, d2)`` -- combine coverage info from two dicts.
* ``read_coverage(filename)`` -- load the coverage dictionary
* ``write_coverage(filename)`` -- write the coverage out.
* ``annotate_coverage(...)`` -- annotate a Python file with its coverage info.
Known problems:
-- module docstrings are *covered* but not found.
AUTHOR: C. Titus Brown, titus@idyll.org
'figleaf' is Copyright (C) 2006. It will be released under the BSD license.
"""
import sys
import os
import threading
from cPickle import dump, load
### import builtin sets if in > 2.4, otherwise use 'sets' module.
# we require 2.4 or later
assert set
from token import tok_name, NEWLINE, STRING, INDENT, DEDENT, COLON
import parser, types, symbol
def get_token_name(x):
"""
Utility to help pretty-print AST symbols/Python tokens.
"""
if symbol.sym_name.has_key(x):
return symbol.sym_name[x]
return tok_name.get(x, '-')
class LineGrabber:
"""
Count 'interesting' lines of Python in source files, where
'interesting' is defined as 'lines that could possibly be
executed'.
@CTB this badly needs to be refactored... once I have automated
tests ;)
"""
def __init__(self, fp):
"""
Count lines of code in 'fp'.
"""
self.lines = set()
self.ast = parser.suite(fp.read())
self.tree = parser.ast2tuple(self.ast, True)
self.find_terminal_nodes(self.tree)
def find_terminal_nodes(self, tup):
"""
Recursively eat an AST in tuple form, finding the first line
number for "interesting" code.
"""
(sym, rest) = tup[0], tup[1:]
line_nos = set()
if type(rest[0]) == types.TupleType: ### node
for x in rest:
token_line_no = self.find_terminal_nodes(x)
if token_line_no is not None:
line_nos.add(token_line_no)
if symbol.sym_name[sym] in ('stmt', 'suite', 'lambdef',
'except_clause') and line_nos:
# store the line number that this statement started at
self.lines.add(min(line_nos))
elif symbol.sym_name[sym] in ('if_stmt',):
# add all lines under this
self.lines.update(line_nos)
elif symbol.sym_name[sym] in ('global_stmt',): # IGNORE
return
else:
if line_nos:
return min(line_nos)
else: ### leaf
if sym not in (NEWLINE, STRING, INDENT, DEDENT, COLON) and \
tup[1] != 'else':
return tup[2]
return None
def pretty_print(self, tup=None, indent=0):
"""
Pretty print the AST.
"""
if tup is None:
tup = self.tree
s = tup[1]
if type(s) == types.TupleType:
print ' '*indent, get_token_name(tup[0])
for x in tup[1:]:
self.pretty_print(x, indent+1)
else:
print ' '*indent, get_token_name(tup[0]), tup[1:]
def get_lines(fp):
"""
Return the set of interesting lines in the source code read from
this file handle.
"""
l = LineGrabber(fp)
return l.lines
class CodeTracer:
"""
Basic code coverage tracking, using sys.settrace.
"""
def __init__(self, ignore_prefixes=[]):
self.c = {}
self.started = False
self.ignore_prefixes = ignore_prefixes
def start(self):
"""
Start recording.
"""
if not self.started:
self.started = True
sys.settrace(self.g)
if hasattr(threading, 'settrace'):
threading.settrace(self.g)
def stop(self):
if self.started:
sys.settrace(None)
if hasattr(threading, 'settrace'):
threading.settrace(None)
self.started = False
def g(self, f, e, a):
"""
global trace function.
"""
if e is 'call':
for p in self.ignore_prefixes:
if f.f_code.co_filename.startswith(p):
return
return self.t
def t(self, f, e, a):
"""
local trace function.
"""
if e is 'line':
self.c[(f.f_code.co_filename, f.f_lineno)] = 1
return self.t
def clear(self):
"""
wipe out coverage info
"""
self.c = {}
def gather_files(self):
"""
Return the dictionary of lines of executed code; the dict
contains items (k, v), where 'k' is the filename and 'v'
is a set of line numbers.
"""
files = {}
for (filename, line) in self.c.keys():
d = files.get(filename, set())
d.add(line)
files[filename] = d
return files
def combine_coverage(d1, d2):
"""
Given two coverage dictionaries, combine the recorded coverage
and return a new dictionary.
"""
keys = set(d1.keys())
keys.update(set(d2.keys()))
new_d = {}
for k in keys:
v = d1.get(k, set())
v2 = d2.get(k, set())
s = set(v)
s.update(v2)
new_d[k] = s
return new_d
def write_coverage(filename, combine=True):
"""
Write the current coverage info out to the given filename. If
'combine' is false, destroy any previously recorded coverage info.
"""
if _t is None:
return
d = _t.gather_files()
# combine?
if combine:
old = {}
fp = None
try:
fp = open(filename)
except IOError:
pass
if fp:
old = load(fp)
fp.close()
d = combine_coverage(d, old)
# ok, save.
outfp = open(filename, 'w')
try:
dump(d, outfp)
finally:
outfp.close()
def read_coverage(filename):
"""
Read a coverage dictionary in from the given file.
"""
fp = open(filename)
try:
d = load(fp)
finally:
fp.close()
return d
def annotate_coverage(in_fp, out_fp, covered, all_lines,
mark_possible_lines=False):
"""
A simple example coverage annotator that outputs text.
"""
for i, line in enumerate(in_fp):
i = i + 1
if i in covered:
symbol = '>'
elif i in all_lines:
symbol = '!'
else:
symbol = ' '
symbol2 = ''
if mark_possible_lines:
symbol2 = ' '
if i in all_lines:
symbol2 = '-'
out_fp.write('%s%s %s' % (symbol, symbol2, line,))
#######################
#
# singleton functions/top-level API
#
_t = None
def start(ignore_python_lib=True, ignore_prefixes=[]):
"""
Start tracing code coverage. If 'ignore_python_lib' is True,
ignore all files that live below the same directory as the 'os'
module.
"""
global _t
if _t is None:
ignore_prefixes = ignore_prefixes[:]
if ignore_python_lib:
ignore_prefixes.append(os.path.realpath(os.path.dirname(os.__file__)))
_t = CodeTracer(ignore_prefixes)
_t.start()
def stop():
"""
Stop tracing code coverage.
"""
global _t
if _t is not None:
_t.stop()
def get_trace_obj():
"""
Return the (singleton) trace object, if it exists.
"""
return _t
def get_info():
"""
Get the coverage dictionary from the trace object.
"""
if _t:
return _t.gather_files()
#############
def display_ast():
l = LineGrabber(open(sys.argv[1]))
l.pretty_print()
def main():
"""
Execute the given Python file with coverage, making it look like it is
__main__.
"""
ignore_pylibs = False
def print_help():
print 'Usage: figleaf [-i] <program-to-profile> <program-options>'
print ''
print 'Options:'
print ' -i Ignore Python standard libraries when calculating coverage'
args = sys.argv[1:]
if len(args) < 1:
print_help()
raise SystemExit()
elif len(args) > 2 and args[0] == '-i':
ignore_pylibs = True
## Make sure to strip off the -i or --ignore-python-libs option if it exists
args = args[1:]
## Reset system args so that the subsequently exec'd file can read from sys.argv
sys.argv = args
sys.path[0] = os.path.dirname(args[0])
cwd = os.getcwd()
start(ignore_pylibs) # START code coverage
import __main__
try:
execfile(args[0], __main__.__dict__)
finally:
stop() # STOP code coverage
write_coverage(os.path.join(cwd, '.figleaf'))