Record coverage using debuggable-module (#2701)

This commit is contained in:
Joe Ranweiler 2022-12-15 15:20:26 -08:00 committed by GitHub
parent 054910e39b
commit ff923d28e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 1410 additions and 0 deletions

17
src/agent/Cargo.lock generated
View File

@ -555,6 +555,23 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "coverage"
version = "0.1.0"
dependencies = [
"anyhow",
"clap 4.0.26",
"debuggable-module",
"debugger",
"iced-x86",
"log",
"pete",
"procfs",
"regex",
"symbolic 10.1.4",
"thiserror",
]
[[package]]
name = "coverage-legacy"
version = "0.1.0"

View File

@ -1,6 +1,7 @@
[workspace]
members = [
"atexit",
"coverage",
"coverage-legacy",
"debuggable-module",
"debugger",

View File

@ -0,0 +1,26 @@
[package]
name = "coverage"
version = "0.1.0"
edition = "2021"
license = "MIT"
[dependencies]
anyhow = "1.0"
debuggable-module = { path = "../debuggable-module" }
iced-x86 = "1.17"
log = "0.4.17"
regex = "1.0"
symbolic = { version = "10.1", features = ["debuginfo", "demangle", "symcache"] }
thiserror = "1.0"
[target.'cfg(target_os = "windows")'.dependencies]
debugger = { path = "../debugger" }
[target.'cfg(target_os = "linux")'.dependencies]
pete = "0.9"
# For procfs, opt out of the `chrono` freature; it pulls in an old version
# of `time`. We do not use the methods that the `chrono` feature enables.
procfs = { version = "0.12", default-features = false, features=["flate2"] }
[dev-dependencies]
clap = { version = "4.0", features = ["derive"] }

View File

@ -0,0 +1,65 @@
use std::process::Command;
use std::time::Duration;
use anyhow::Result;
use clap::Parser;
use coverage::allowlist::{AllowList, TargetAllowList};
use coverage::binary::BinaryCoverage;
#[derive(Parser, Debug)]
struct Args {
#[arg(long)]
module_allowlist: Option<String>,
#[arg(long)]
source_allowlist: Option<String>,
#[arg(short, long)]
timeout: Option<u64>,
command: Vec<String>,
}
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(5);
fn main() -> Result<()> {
let args = Args::parse();
let timeout = args
.timeout
.map(Duration::from_millis)
.unwrap_or(DEFAULT_TIMEOUT);
let mut cmd = Command::new(&args.command[0]);
if args.command.len() > 1 {
cmd.args(&args.command[1..]);
}
let mut allowlist = TargetAllowList::default();
if let Some(path) = &args.module_allowlist {
allowlist.modules = AllowList::load(path)?;
}
if let Some(path) = &args.source_allowlist {
allowlist.source_files = AllowList::load(path)?;
}
let coverage = coverage::record::record(cmd, timeout, allowlist)?;
dump_modoff(coverage)?;
Ok(())
}
fn dump_modoff(coverage: BinaryCoverage) -> Result<()> {
for (module, coverage) in &coverage.modules {
for (offset, count) in coverage.as_ref() {
if count.reached() {
println!("{}+{offset:x}", module.base_name());
}
}
}
Ok(())
}

View File

@ -0,0 +1,156 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use anyhow::Result;
use regex::{Regex, RegexSet};
use std::path::Path;
#[derive(Clone, Debug, Default)]
pub struct TargetAllowList {
pub functions: AllowList,
pub modules: AllowList,
pub source_files: AllowList,
}
impl TargetAllowList {
pub fn new(modules: AllowList, source_files: AllowList) -> Self {
// Allow all.
let functions = AllowList::default();
Self {
functions,
modules,
source_files,
}
}
}
#[derive(Clone, Debug)]
pub struct AllowList {
allow: RegexSet,
deny: RegexSet,
}
impl AllowList {
pub fn new(allow: RegexSet, deny: RegexSet) -> Self {
Self { allow, deny }
}
pub fn load(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref();
let text = std::fs::read_to_string(path)?;
Self::parse(&text)
}
pub fn parse(text: &str) -> Result<Self> {
use std::io::{BufRead, BufReader};
let reader = BufReader::new(text.as_bytes());
let mut allow = vec![];
let mut deny = vec![];
// We could just collect and pass to the `RegexSet` ctor.
//
// Instead, check each rule individually for diagnostic purposes.
for (index, line) in reader.lines().enumerate() {
let line = line?;
match AllowListLine::parse(&line) {
Ok(valid) => {
use AllowListLine::*;
match valid {
Blank | Comment => {
// Ignore.
}
Allow(re) => {
allow.push(re);
}
Deny(re) => {
deny.push(re);
}
}
}
Err(err) => {
// Ignore invalid lines, but warn.
let line_number = index + 1;
warn!("error at line {}: {}", line_number, err);
}
}
}
let allow = RegexSet::new(allow.iter().map(|re| re.as_str()))?;
let deny = RegexSet::new(deny.iter().map(|re| re.as_str()))?;
let allowlist = AllowList::new(allow, deny);
Ok(allowlist)
}
pub fn is_allowed(&self, path: impl AsRef<str>) -> bool {
let path = path.as_ref();
// Allowed if rule-allowed but not excluded by a negative (deny) rule.
self.allow.is_match(path) && !self.deny.is_match(path)
}
}
impl Default for AllowList {
fn default() -> Self {
// Unwrap-safe due to valid constant expr.
let allow = RegexSet::new([".*"]).unwrap();
let deny = RegexSet::empty();
AllowList::new(allow, deny)
}
}
pub enum AllowListLine {
Blank,
Comment,
Allow(Regex),
Deny(Regex),
}
impl AllowListLine {
pub fn parse(line: &str) -> Result<Self> {
let line = line.trim();
// Allow and ignore blank lines.
if line.is_empty() {
return Ok(Self::Blank);
}
// Support comments of the form `# <comment>`.
if line.starts_with("# ") {
return Ok(Self::Comment);
}
// Deny rules are of the form `! <rule>`.
if let Some(expr) = line.strip_prefix("! ") {
let re = glob_to_regex(expr)?;
return Ok(Self::Deny(re));
}
// Try to interpret as allow rule.
let re = glob_to_regex(line)?;
Ok(Self::Allow(re))
}
}
#[allow(clippy::single_char_pattern)]
fn glob_to_regex(expr: &str) -> Result<Regex> {
// Don't make users escape Windows path separators.
let expr = expr.replace(r"\", r"\\");
// Translate glob wildcards into quantified regexes.
let expr = expr.replace("*", ".*");
// Anchor to line start and end.
let expr = format!("^{expr}$");
Ok(Regex::new(&expr)?)
}
#[cfg(test)]
mod tests;

View File

@ -0,0 +1,3 @@
a/*
! a/c
# c

View File

@ -0,0 +1,3 @@
a/*
! a/c
c

View File

@ -0,0 +1 @@
*

View File

@ -0,0 +1,4 @@
a
a/b
b
c

View File

@ -0,0 +1,2 @@
a
b

View File

@ -0,0 +1,101 @@
use anyhow::Result;
use super::AllowList;
#[test]
fn test_default() -> Result<()> {
let allowlist = AllowList::default();
// All allowed.
assert!(allowlist.is_allowed("a"));
assert!(allowlist.is_allowed("a/b"));
assert!(allowlist.is_allowed("b"));
assert!(allowlist.is_allowed("c"));
Ok(())
}
#[test]
fn test_empty() -> Result<()> {
let text = include_str!("test-data/empty.txt");
let allowlist = AllowList::parse(text)?;
// All excluded.
assert!(!allowlist.is_allowed("a"));
assert!(!allowlist.is_allowed("a/b"));
assert!(!allowlist.is_allowed("b"));
assert!(!allowlist.is_allowed("c"));
Ok(())
}
#[test]
fn test_allow_some() -> Result<()> {
let text = include_str!("test-data/allow-some.txt");
let allowlist = AllowList::parse(text)?;
assert!(allowlist.is_allowed("a"));
assert!(!allowlist.is_allowed("a/b"));
assert!(allowlist.is_allowed("b"));
assert!(!allowlist.is_allowed("c"));
Ok(())
}
#[test]
fn test_allow_all() -> Result<()> {
let text = include_str!("test-data/allow-all.txt");
let allowlist = AllowList::parse(text)?;
assert!(allowlist.is_allowed("a"));
assert!(allowlist.is_allowed("a/b"));
assert!(allowlist.is_allowed("b"));
assert!(allowlist.is_allowed("c"));
Ok(())
}
#[test]
fn test_allow_all_glob() -> Result<()> {
let text = include_str!("test-data/allow-all-glob.txt");
let allowlist = AllowList::parse(text)?;
assert!(allowlist.is_allowed("a"));
assert!(allowlist.is_allowed("a/b"));
assert!(allowlist.is_allowed("b"));
assert!(allowlist.is_allowed("c"));
Ok(())
}
#[test]
fn test_allow_glob_except() -> Result<()> {
let text = include_str!("test-data/allow-all-glob-except.txt");
let allowlist = AllowList::parse(text)?;
assert!(!allowlist.is_allowed("a"));
assert!(allowlist.is_allowed("a/b"));
assert!(!allowlist.is_allowed("a/c"));
assert!(allowlist.is_allowed("a/d"));
assert!(!allowlist.is_allowed("b"));
assert!(allowlist.is_allowed("c"));
Ok(())
}
#[test]
fn test_allow_glob_except_commented() -> Result<()> {
let text = include_str!("test-data/allow-all-glob-except-commented.txt");
let allowlist = AllowList::parse(text)?;
assert!(!allowlist.is_allowed("a"));
assert!(allowlist.is_allowed("a/b"));
assert!(!allowlist.is_allowed("a/c"));
assert!(allowlist.is_allowed("a/d"));
assert!(!allowlist.is_allowed("b"));
// Allowed by the rule `c`, but not allowed because `# c` is a comment.
assert!(!allowlist.is_allowed("c"));
Ok(())
}

View File

@ -0,0 +1,95 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::collections::{BTreeMap, BTreeSet};
use anyhow::{bail, Result};
use debuggable_module::{block, path::FilePath, Module, Offset};
use symbolic::debuginfo::Object;
use symbolic::symcache::{SymCache, SymCacheConverter};
use crate::allowlist::TargetAllowList;
#[derive(Clone, Debug, Default)]
pub struct BinaryCoverage {
pub modules: BTreeMap<FilePath, ModuleBinaryCoverage>,
}
#[derive(Clone, Debug, Default)]
pub struct ModuleBinaryCoverage {
pub offsets: BTreeMap<Offset, Count>,
}
impl ModuleBinaryCoverage {
pub fn increment(&mut self, offset: Offset) -> Result<()> {
if let Some(count) = self.offsets.get_mut(&offset) {
count.increment();
} else {
bail!("unknown coverage offset: {offset:x}");
};
Ok(())
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Count(pub u32);
impl Count {
pub fn increment(&mut self) {
self.0 = self.0.saturating_add(1);
}
pub fn reached(&self) -> bool {
self.0 > 0
}
}
pub fn find_coverage_sites<'data>(
module: &dyn Module<'data>,
allowlist: &TargetAllowList,
) -> Result<ModuleBinaryCoverage> {
let debuginfo = module.debuginfo()?;
let mut symcache = vec![];
let mut converter = SymCacheConverter::new();
let exe = Object::parse(module.executable_data())?;
converter.process_object(&exe)?;
let di = Object::parse(module.debuginfo_data())?;
converter.process_object(&di)?;
converter.serialize(&mut std::io::Cursor::new(&mut symcache))?;
let symcache = SymCache::parse(&symcache)?;
let mut offsets = BTreeSet::new();
for function in debuginfo.functions() {
if !allowlist.functions.is_allowed(&function.name) {
continue;
}
if let Some(location) = symcache.lookup(function.offset.0).next() {
if let Some(file) = location.file() {
let path = file.full_path();
if allowlist.source_files.is_allowed(&path) {
let blocks =
block::sweep_region(module, &debuginfo, function.offset, function.size)?;
offsets.extend(blocks.iter().map(|b| b.offset));
}
}
}
}
let mut coverage = ModuleBinaryCoverage::default();
coverage
.offsets
.extend(offsets.into_iter().map(|o| (o, Count(0))));
Ok(coverage)
}
impl AsRef<BTreeMap<Offset, Count>> for ModuleBinaryCoverage {
fn as_ref(&self) -> &BTreeMap<Offset, Count> {
&self.offsets
}
}

View File

@ -0,0 +1,17 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#[macro_use]
extern crate log;
pub mod allowlist;
pub mod binary;
pub mod record;
pub mod source;
mod timer;
#[doc(inline)]
pub use allowlist::{AllowList, TargetAllowList};
#[doc(inline)]
pub use record::record;

View File

@ -0,0 +1,14 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#[cfg(target_os = "linux")]
pub mod linux;
#[cfg(target_os = "windows")]
pub mod windows;
#[cfg(target_os = "linux")]
pub use crate::record::linux::record;
#[cfg(target_os = "windows")]
pub use crate::record::windows::record;

View File

@ -0,0 +1,131 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::collections::BTreeMap;
use std::process::Command;
use std::time::Duration;
use anyhow::{bail, Result};
use debuggable_module::linux::LinuxModule;
use debuggable_module::load_module::LoadModule;
use debuggable_module::loader::Loader;
use debuggable_module::path::FilePath;
use debuggable_module::Address;
use pete::Tracee;
pub mod debugger;
use debugger::{DebugEventHandler, Debugger, DebuggerContext, ModuleImage};
use crate::allowlist::TargetAllowList;
use crate::binary::{self, BinaryCoverage};
pub fn record(
cmd: Command,
timeout: Duration,
allowlist: impl Into<Option<TargetAllowList>>,
) -> Result<BinaryCoverage> {
let loader = Loader::new();
let allowlist = allowlist.into().unwrap_or_default();
crate::timer::timed(timeout, move || {
let mut recorder = LinuxRecorder::new(&loader, allowlist);
let dbg = Debugger::new(&mut recorder);
dbg.run(cmd)?;
Ok(recorder.coverage)
})?
}
pub struct LinuxRecorder<'data> {
allowlist: TargetAllowList,
coverage: BinaryCoverage,
loader: &'data Loader,
modules: BTreeMap<FilePath, LinuxModule<'data>>,
}
impl<'data> LinuxRecorder<'data> {
pub fn new(loader: &'data Loader, allowlist: TargetAllowList) -> Self {
let coverage = BinaryCoverage::default();
let modules = BTreeMap::new();
Self {
allowlist,
coverage,
loader,
modules,
}
}
fn do_on_breakpoint(
&mut self,
context: &mut DebuggerContext,
tracee: &mut Tracee,
) -> Result<()> {
let regs = tracee.registers()?;
let addr = Address(regs.rip);
if let Some(image) = context.find_image_for_addr(addr) {
if let Some(coverage) = self.coverage.modules.get_mut(image.path()) {
let offset = addr.offset_from(image.base())?;
coverage.increment(offset)?;
} else {
bail!("coverage not initialized for module {}", image.path());
}
} else {
bail!("no image for addr: {addr:x}");
}
Ok(())
}
fn do_on_module_load(
&mut self,
context: &mut DebuggerContext,
tracee: &mut Tracee,
image: &ModuleImage,
) -> Result<()> {
info!("module load: {}", image.path());
let path = image.path();
if !self.allowlist.modules.is_allowed(path) {
debug!("not inserting denylisted module: {path}");
return Ok(());
}
let module = if let Ok(module) = LinuxModule::load(self.loader, path.clone()) {
module
} else {
debug!("skipping undebuggable module: {path}");
return Ok(());
};
let coverage = binary::find_coverage_sites(&module, &self.allowlist)?;
for offset in coverage.as_ref().keys().copied() {
let addr = image.base().offset_by(offset)?;
context.breakpoints.set(tracee, addr)?;
}
self.coverage.modules.insert(path.clone(), coverage);
self.modules.insert(path.clone(), module);
Ok(())
}
}
impl<'data> DebugEventHandler for LinuxRecorder<'data> {
fn on_breakpoint(&mut self, context: &mut DebuggerContext, tracee: &mut Tracee) -> Result<()> {
self.do_on_breakpoint(context, tracee)
}
fn on_module_load(
&mut self,
context: &mut DebuggerContext,
tracee: &mut Tracee,
image: &ModuleImage,
) -> Result<()> {
self.do_on_module_load(context, tracee, image)
}
}

View File

@ -0,0 +1,370 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::collections::BTreeMap;
use std::process::Command;
use anyhow::{bail, format_err, Result};
use debuggable_module::path::FilePath;
use debuggable_module::Address;
use pete::{Ptracer, Restart, Signal, Stop, Tracee};
use procfs::process::{MMapPath, MemoryMap, Process};
pub trait DebugEventHandler {
fn on_breakpoint(&mut self, dbg: &mut DebuggerContext, tracee: &mut Tracee) -> Result<()>;
fn on_module_load(
&mut self,
db: &mut DebuggerContext,
tracee: &mut Tracee,
image: &ModuleImage,
) -> Result<()>;
}
pub struct Debugger<'eh> {
context: DebuggerContext,
event_handler: &'eh mut dyn DebugEventHandler,
}
impl<'eh> Debugger<'eh> {
pub fn new(event_handler: &'eh mut dyn DebugEventHandler) -> Self {
let context = DebuggerContext::new();
Self {
context,
event_handler,
}
}
pub fn run(mut self, cmd: Command) -> Result<()> {
let mut child = self.context.tracer.spawn(cmd)?;
if let Err(err) = self.wait_on_stops() {
// Ignore error if child already exited.
let _ = child.kill();
return Err(err);
}
Ok(())
}
fn wait_on_stops(mut self) -> Result<()> {
use pete::ptracer::Options;
// Continue the tracee process until the return from its initial `execve()`.
let mut tracee = continue_to_init_execve(&mut self.context.tracer)?;
// Do not follow forks.
//
// After this, we assume that any new tracee is a thread in the same
// group as the root tracee.
let mut options = Options::all();
options.remove(Options::PTRACE_O_TRACEFORK);
options.remove(Options::PTRACE_O_TRACEVFORK);
options.remove(Options::PTRACE_O_TRACEEXEC);
tracee.set_options(options)?;
// Initialize index of mapped modules now that we have a PID to query.
self.context.images = Some(Images::new(tracee.pid.as_raw()));
self.update_images(&mut tracee)?;
// Restart tracee and enter the main debugger loop.
self.context.tracer.restart(tracee, Restart::Syscall)?;
while let Some(mut tracee) = self.context.tracer.wait()? {
match tracee.stop {
Stop::SyscallEnter => trace!("syscall-enter: {:?}", tracee.stop),
Stop::SyscallExit => {
self.update_images(&mut tracee)?;
}
Stop::SignalDelivery {
signal: Signal::SIGTRAP,
} => {
self.restore_and_call_if_breakpoint(&mut tracee)?;
}
Stop::Clone { new: pid } => {
// Only seen when the `VM_CLONE` flag is set, as of Linux 4.15.
info!("new thread: {}", pid);
}
_ => {
debug!("stop: {:?}", tracee.stop);
}
}
if let Err(err) = self.context.tracer.restart(tracee, Restart::Syscall) {
error!("unable to restart tracee: {}", err);
}
}
Ok(())
}
fn restore_and_call_if_breakpoint(&mut self, tracee: &mut Tracee) -> Result<()> {
let mut regs = tracee.registers()?;
// Compute what the last PC would have been _if_ we stopped due to a soft breakpoint.
//
// If we don't have a registered breakpoint, then we will not use this value.
let pc = Address(regs.rip.saturating_sub(1));
if self.context.breakpoints.clear(tracee, pc)? {
// We restored the original, `int3`-clobbered instruction in `clear()`. Now
// set the tracee's registers to execute it on restart. Do this _before_ the
// callback to simulate a hardware breakpoint.
regs.rip = pc.0;
tracee.set_registers(regs)?;
self.event_handler
.on_breakpoint(&mut self.context, tracee)?;
} else {
warn!("no registered breakpoint for SIGTRAP delivery at {pc:x}");
// We didn't fix up a registered soft breakpoint, so we have no reason to
// re-execute the instruction at the last PC. Leave the tracee registers alone.
}
Ok(())
}
fn update_images(&mut self, tracee: &mut Tracee) -> Result<()> {
let images = self
.context
.images
.as_mut()
.ok_or_else(|| format_err!("internal error: recorder images not initialized"))?;
let events = images.update()?;
for (_base, image) in &events.loaded {
self.event_handler
.on_module_load(&mut self.context, tracee, image)?;
}
Ok(())
}
}
pub struct DebuggerContext {
pub breakpoints: Breakpoints,
pub images: Option<Images>,
pub tracer: Ptracer,
}
impl DebuggerContext {
#[allow(clippy::new_without_default)]
pub fn new() -> Self {
let breakpoints = Breakpoints::default();
let images = None;
let tracer = Ptracer::new();
Self {
breakpoints,
images,
tracer,
}
}
pub fn find_image_for_addr(&self, addr: Address) -> Option<&ModuleImage> {
self.images.as_ref()?.find_image_for_addr(addr)
}
}
/// Executable memory-mapped files for a process.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Images {
mapped: BTreeMap<Address, ModuleImage>,
pid: i32,
}
impl Images {
pub fn new(pid: i32) -> Self {
let mapped = BTreeMap::default();
Self { mapped, pid }
}
pub fn mapped(&self) -> impl Iterator<Item = (Address, &ModuleImage)> {
self.mapped.iter().map(|(va, i)| (*va, i))
}
pub fn update(&mut self) -> Result<LoadEvents> {
let proc = Process::new(self.pid)?;
let mut new = BTreeMap::new();
let mut group: Vec<MemoryMap> = vec![];
for map in proc.maps()? {
if let Some(last) = group.last() {
if last.pathname == map.pathname {
// The current memory mapping is the start of a new group.
//
// Consume the current group, and track any new module image.
if let Ok(image) = ModuleImage::new(group) {
let base = image.base();
new.insert(base, image);
}
// Reset the current group.
group = vec![];
}
}
group.push(map);
}
let events = LoadEvents::new(&self.mapped, &new);
self.mapped = new;
Ok(events)
}
pub fn find_image_for_addr(&self, addr: Address) -> Option<&ModuleImage> {
let (_, image) = self.mapped().find(|(_, im)| im.contains(&addr))?;
Some(image)
}
}
/// A `MemoryMap` that is known to be file-backed and executable.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ModuleImage {
base: Address,
maps: Vec<MemoryMap>,
path: FilePath,
}
impl ModuleImage {
// Accepts an increasing sequence of memory mappings with a common file-backed
// pathname.
pub fn new(mut maps: Vec<MemoryMap>) -> Result<Self> {
maps.sort_by_key(|m| m.address);
if maps.is_empty() {
bail!("no mapping for module image");
}
if !maps.iter().any(|m| m.perms.contains('x')) {
bail!("no executable mapping for module image");
}
// Cannot panic due to initial length check.
let first = &maps[0];
let path = if let MMapPath::Path(path) = &first.pathname {
FilePath::new(path.to_string_lossy())?
} else {
bail!("module image mappings must be file-backed");
};
for map in &maps {
if map.pathname != first.pathname {
bail!("module image mapping not file-backed");
}
}
let base = Address(first.address.0);
let image = ModuleImage { base, maps, path };
Ok(image)
}
pub fn path(&self) -> &FilePath {
&self.path
}
pub fn base(&self) -> Address {
self.base
}
pub fn contains(&self, addr: &Address) -> bool {
for map in &self.maps {
let lo = Address(map.address.0);
let hi = Address(map.address.1);
if (lo..hi).contains(addr) {
return true;
}
}
false
}
}
pub struct LoadEvents {
pub loaded: Vec<(Address, ModuleImage)>,
pub unloaded: Vec<(Address, ModuleImage)>,
}
impl LoadEvents {
pub fn new(old: &BTreeMap<Address, ModuleImage>, new: &BTreeMap<Address, ModuleImage>) -> Self {
// New not in old.
let loaded: Vec<_> = new
.iter()
.filter(|(nva, n)| {
!old.iter()
.any(|(iva, i)| *nva == iva && n.path() == i.path())
})
.map(|(va, i)| (*va, i.clone()))
.collect();
// Old not in new.
let unloaded: Vec<_> = old
.iter()
.filter(|(iva, i)| {
!new.iter()
.any(|(nva, n)| nva == *iva && n.path() == i.path())
})
.map(|(va, i)| (*va, i.clone()))
.collect();
Self { loaded, unloaded }
}
}
#[derive(Clone, Debug, Default)]
pub struct Breakpoints {
saved: BTreeMap<Address, u8>,
}
impl Breakpoints {
pub fn set(&mut self, tracee: &mut Tracee, addr: Address) -> Result<()> {
// Return if the breakpoint exists. We don't want to conclude that the
// saved instruction byte was `0xcc`.
if self.saved.contains_key(&addr) {
return Ok(());
}
let mut data = [0u8];
tracee.read_memory_mut(addr.0, &mut data)?;
self.saved.insert(addr, data[0]);
tracee.write_memory(addr.0, &[0xcc])?;
Ok(())
}
pub fn clear(&mut self, tracee: &mut Tracee, addr: Address) -> Result<bool> {
let data = self.saved.remove(&addr);
let cleared = if let Some(data) = data {
tracee.write_memory(addr.0, &[data])?;
true
} else {
false
};
Ok(cleared)
}
}
fn continue_to_init_execve(tracer: &mut Ptracer) -> Result<Tracee> {
while let Some(tracee) = tracer.wait()? {
if let Stop::SyscallExit = &tracee.stop {
return Ok(tracee);
}
tracer.restart(tracee, Restart::Continue)?;
}
bail!("did not see initial execve() in tracee while recording coverage");
}

View File

@ -0,0 +1,210 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::collections::BTreeMap;
use std::path::Path;
use std::process::Command;
use std::time::Duration;
use anyhow::{anyhow, Result};
use debuggable_module::load_module::LoadModule;
use debuggable_module::loader::Loader;
use debuggable_module::path::FilePath;
use debuggable_module::windows::WindowsModule;
use debuggable_module::Offset;
use debugger::{BreakpointId, BreakpointType, DebugEventHandler, Debugger, ModuleLoadInfo};
use crate::allowlist::TargetAllowList;
use crate::binary::{self, BinaryCoverage};
pub fn record(
cmd: Command,
timeout: Duration,
allowlist: impl Into<Option<TargetAllowList>>,
) -> Result<BinaryCoverage> {
let loader = Loader::new();
let allowlist = allowlist.into().unwrap_or_default();
crate::timer::timed(timeout, move || {
let mut recorder = WindowsRecorder::new(&loader, allowlist);
let (mut dbg, _child) = Debugger::init(cmd, &mut recorder)?;
dbg.run(&mut recorder)?;
Ok(recorder.coverage)
})?
}
pub struct WindowsRecorder<'data> {
allowlist: TargetAllowList,
breakpoints: Breakpoints,
coverage: BinaryCoverage,
loader: &'data Loader,
modules: BTreeMap<FilePath, WindowsModule<'data>>,
}
impl<'data> WindowsRecorder<'data> {
pub fn new(loader: &'data Loader, allowlist: TargetAllowList) -> Self {
let breakpoints = Breakpoints::default();
let coverage = BinaryCoverage::default();
let modules = BTreeMap::new();
Self {
allowlist,
breakpoints,
coverage,
loader,
modules,
}
}
pub fn allowlist(&self) -> &TargetAllowList {
&self.allowlist
}
pub fn allowlist_mut(&mut self) -> &mut TargetAllowList {
&mut self.allowlist
}
fn try_on_create_process(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) -> Result<()> {
// Not necessary for PDB search, but enables use of other `dbghelp` APIs.
if let Err(err) = dbg.target().maybe_sym_initialize() {
error!(
"unable to initialize symbol handler for new process {}: {:?}",
module.path().display(),
err,
);
}
self.insert_module(dbg, module)
}
fn try_on_load_dll(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) -> Result<()> {
self.insert_module(dbg, module)
}
fn try_on_breakpoint(&mut self, _dbg: &mut Debugger, id: BreakpointId) -> Result<()> {
let breakpoint = self
.breakpoints
.remove(id)
.ok_or_else(|| anyhow!("stopped on dangling breakpoint"))?;
let coverage = self
.coverage
.modules
.get_mut(&breakpoint.module)
.ok_or_else(|| anyhow!("coverage not initialized for module: {}", breakpoint.module))?;
coverage.increment(breakpoint.offset)?;
Ok(())
}
fn stop(&self, dbg: &mut Debugger) {
dbg.quit_debugging();
}
fn insert_module(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) -> Result<()> {
let path = FilePath::new(module.path().to_string_lossy())?;
if !self.allowlist.modules.is_allowed(&path) {
debug!("not inserting denylisted module: {path}");
return Ok(());
}
let module = if let Ok(m) = WindowsModule::load(self.loader, path.clone()) {
m
} else {
debug!("skipping undebuggable module: {path}");
return Ok(());
};
let coverage = binary::find_coverage_sites(&module, &self.allowlist)?;
for offset in coverage.as_ref().keys().copied() {
let breakpoint = Breakpoint::new(path.clone(), offset);
self.breakpoints.set(dbg, breakpoint)?;
}
self.coverage.modules.insert(path.clone(), coverage);
self.modules.insert(path, module);
Ok(())
}
}
#[derive(Debug, Default)]
struct Breakpoints {
id_to_offset: BTreeMap<BreakpointId, Offset>,
offset_to_breakpoint: BTreeMap<Offset, Breakpoint>,
}
impl Breakpoints {
pub fn set(&mut self, dbg: &mut Debugger, breakpoint: Breakpoint) -> Result<()> {
if self.is_set(&breakpoint) {
return Ok(());
}
self.write(dbg, breakpoint)
}
// Unguarded action that ovewrites both the target process address space and our index
// of known breakpoints. Callers must use `set()`, which avoids redundant breakpoint
// setting.
fn write(&mut self, dbg: &mut Debugger, breakpoint: Breakpoint) -> Result<()> {
// The `debugger` crates tracks loaded modules by base name. If a path or file
// name is used, software breakpoints will not be written.
let name = Path::new(breakpoint.module.base_name());
let id = dbg.new_rva_breakpoint(name, breakpoint.offset.0, BreakpointType::OneTime)?;
self.id_to_offset.insert(id, breakpoint.offset);
self.offset_to_breakpoint
.insert(breakpoint.offset, breakpoint);
Ok(())
}
pub fn is_set(&self, breakpoint: &Breakpoint) -> bool {
self.offset_to_breakpoint.contains_key(&breakpoint.offset)
}
pub fn remove(&mut self, id: BreakpointId) -> Option<Breakpoint> {
let offset = self.id_to_offset.remove(&id)?;
self.offset_to_breakpoint.remove(&offset)
}
}
#[derive(Clone, Debug)]
struct Breakpoint {
module: FilePath,
offset: Offset,
}
impl Breakpoint {
pub fn new(module: FilePath, offset: Offset) -> Self {
Self { module, offset }
}
}
impl<'data> DebugEventHandler for WindowsRecorder<'data> {
fn on_create_process(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) {
if let Err(err) = self.try_on_create_process(dbg, module) {
warn!("{err}");
self.stop(dbg);
}
}
fn on_load_dll(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) {
if let Err(err) = self.try_on_load_dll(dbg, module) {
warn!("{err}");
self.stop(dbg);
}
}
fn on_breakpoint(&mut self, dbg: &mut Debugger, bp: BreakpointId) {
if let Err(err) = self.try_on_breakpoint(dbg, bp) {
warn!("{err}");
self.stop(dbg);
}
}
}

View File

@ -0,0 +1,157 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::collections::{BTreeMap, BTreeSet};
use anyhow::{bail, Result};
use debuggable_module::block::{sweep_region, Block, Blocks};
use debuggable_module::load_module::LoadModule;
use debuggable_module::loader::Loader;
use debuggable_module::path::FilePath;
use debuggable_module::{Module, Offset};
use crate::binary::BinaryCoverage;
pub use crate::binary::Count;
#[derive(Clone, Debug, Default)]
pub struct SourceCoverage {
pub files: BTreeMap<FilePath, FileCoverage>,
}
#[derive(Clone, Debug, Default)]
pub struct FileCoverage {
pub lines: BTreeMap<Line, Count>,
}
// Must be nonzero.
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
pub struct Line(u32);
impl Line {
pub fn new(number: u32) -> Result<Self> {
if number == 0 {
bail!("line numbers must be nonzero");
}
Ok(Line(number))
}
pub fn number(&self) -> u32 {
self.0
}
}
impl From<Line> for u32 {
fn from(line: Line) -> Self {
line.number()
}
}
pub fn binary_to_source_coverage(binary: &BinaryCoverage) -> Result<SourceCoverage> {
use std::collections::btree_map::Entry;
use symbolic::debuginfo::Object;
use symbolic::symcache::{SymCache, SymCacheConverter};
let loader = Loader::new();
let mut source = SourceCoverage::default();
for (exe_path, coverage) in &binary.modules {
let module: Box<dyn Module> = Box::load(&loader, exe_path.clone())?;
let debuginfo = module.debuginfo()?;
let mut symcache = vec![];
let mut converter = SymCacheConverter::new();
let exe = Object::parse(module.executable_data())?;
converter.process_object(&exe)?;
let di = Object::parse(module.debuginfo_data())?;
converter.process_object(&di)?;
converter.serialize(&mut std::io::Cursor::new(&mut symcache))?;
let symcache = SymCache::parse(&symcache)?;
let mut blocks = Blocks::new();
for function in debuginfo.functions() {
for offset in coverage.as_ref().keys() {
// Recover function blocks if it contains any coverage offset.
if function.contains(offset) {
let function_blocks =
sweep_region(&*module, &debuginfo, function.offset, function.size)?;
blocks.extend(&function_blocks);
break;
}
}
}
for (offset, count) in coverage.as_ref() {
// Inflate blocks.
if let Some(block) = blocks.find(offset) {
let block_offsets = instruction_offsets(&*module, block)?;
for offset in block_offsets {
for location in symcache.lookup(offset.0) {
let line_number = location.line();
if line_number == 0 {
continue;
}
if let Some(file) = location.file() {
let file_path = FilePath::new(file.full_path())?;
// We have a hit.
let file_coverage = source.files.entry(file_path).or_default();
let line = Line(line_number);
match file_coverage.lines.entry(line) {
Entry::Occupied(occupied) => {
let old = occupied.into_mut();
// If we miss any part of a line, count it as missed.
let new = u32::max(old.0, count.0);
*old = Count(new);
}
Entry::Vacant(vacant) => {
vacant.insert(*count);
}
}
}
}
}
}
}
}
Ok(source)
}
fn instruction_offsets(module: &dyn Module, block: &Block) -> Result<BTreeSet<Offset>> {
use iced_x86::Decoder;
let data = module.read(block.offset, block.size)?;
let mut offsets: BTreeSet<Offset> = BTreeSet::default();
let mut pc = block.offset.0;
let mut decoder = Decoder::new(64, data, 0);
decoder.set_ip(pc);
while decoder.can_decode() {
let inst = decoder.decode();
if inst.is_invalid() {
break;
}
offsets.insert(Offset(pc));
pc = inst.ip();
}
Ok(offsets)
}

View File

@ -0,0 +1,37 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::sync::mpsc;
use std::thread;
use std::time::Duration;
use anyhow::{bail, Result};
pub fn timed<F, T>(timeout: Duration, function: F) -> Result<T>
where
T: Send + 'static,
F: FnOnce() -> T + Send + 'static,
{
let (worker_sender, receiver) = mpsc::channel();
let timer_sender = worker_sender.clone();
let _worker = thread::spawn(move || {
let out = function();
worker_sender.send(Timed::Done(out)).unwrap();
});
let _timer = thread::spawn(move || {
thread::sleep(timeout);
timer_sender.send(Timed::Timeout).unwrap();
});
match receiver.recv()? {
Timed::Done(out) => Ok(out),
Timed::Timeout => bail!("function exceeded timeout of {:?}", timeout),
}
}
enum Timed<T> {
Done(T),
Timeout,
}