diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index bfc85f5a3..c9ea85061 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -2532,11 +2532,14 @@ dependencies = [ name = "srcview" version = "0.1.0" dependencies = [ + "anyhow", + "env_logger 0.9.0", "log", "nom 7.0.0", "pdb", "regex", "serde", + "structopt", "xml-rs", ] diff --git a/src/agent/srcview/.gitignore b/src/agent/srcview/.gitignore new file mode 100644 index 000000000..8bda2718d --- /dev/null +++ b/src/agent/srcview/.gitignore @@ -0,0 +1 @@ +res/example.pdb diff --git a/src/agent/srcview/Cargo.toml b/src/agent/srcview/Cargo.toml index 8b492a146..2e331668e 100644 --- a/src/agent/srcview/Cargo.toml +++ b/src/agent/srcview/Cargo.toml @@ -16,3 +16,6 @@ pdb = "0.7" regex = "1" serde = { version = "1", features = ["derive"] } xml-rs = "0.8" +anyhow = "1.0" +structopt = "0.3" +env_logger = "0.9" \ No newline at end of file diff --git a/src/agent/srcview/src/bin/srcview.rs b/src/agent/srcview/src/bin/srcview.rs new file mode 100644 index 000000000..b91f3fe3c --- /dev/null +++ b/src/agent/srcview/src/bin/srcview.rs @@ -0,0 +1,178 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use anyhow::{format_err, Context, Result}; +use srcview::{ModOff, Report, SrcLine, SrcView}; +use std::fs; +use std::io::{stdout, Write}; +use std::path::{Path, PathBuf}; +use structopt::StructOpt; + +#[derive(StructOpt, Debug)] +enum Opt { + Srcloc(SrcLocOpt), + PdbPaths(PdbPathsOpt), + Cobertura(CoberturaOpt), + /// Print 3rd-party license information + Licenses, +} + +/// Print the file paths in the provided PDB +#[derive(StructOpt, Debug)] +struct PdbPathsOpt { + pdb_path: PathBuf, +} + +/// Print modoffset file with file and source lines +#[derive(StructOpt, Debug)] +struct SrcLocOpt { + pdb_path: PathBuf, + modoff_path: PathBuf, + #[structopt(long)] + module_name: Option, +} + +/// Generate a Cobertura XML coverage report +/// +/// Example: +/// srcview cobertura ./res/example.pdb res/example.txt +/// --include-regex "E:\\\\1f\\\\coverage\\\\" +/// --filter-regex "E:\\\\1f\\\\coverage\\\\" +/// --module-name example.exe +/// +/// In this example, only files that live in E:\1f\coverage are included and +/// E:\1f\coverage is removed from the filenames in the resulting XML report. +#[derive(StructOpt, Debug)] +struct CoberturaOpt { + pdb_path: PathBuf, + modoff_path: PathBuf, + #[structopt(long)] + module_name: Option, + + /// regular expression that will be applied against the file paths from the + /// srcview + #[structopt(long)] + include_regex: Option, + + /// search and replace regular expression that is applied to all file + /// paths that will appear in the output report + #[structopt(long)] + filter_regex: Option, +} + +fn main() -> Result<()> { + env_logger::init(); + + let opt = Opt::from_args(); + + match opt { + Opt::Srcloc(opts) => srcloc(opts)?, + Opt::PdbPaths(opts) => pdb_paths(opts)?, + Opt::Cobertura(opts) => cobertura(opts)?, + Opt::Licenses => licenses()?, + }; + + Ok(()) +} + +fn licenses() -> Result<()> { + stdout().write_all(include_bytes!("../../../data/licenses.json"))?; + Ok(()) +} + +// In the case the user did not specify the module name of interest, this +// utility function will guess at the module name based on the PDB path name. +// +// This is a last-ditch effort to ensure the coverage report has something +// consumable. +fn add_common_extensions(srcview: &mut SrcView, pdb_path: &Path) -> Result<()> { + let pdb_file_name = pdb_path.file_name().ok_or_else(|| { + format_err!( + "unable to identify file name from path: {}", + pdb_path.display() + ) + })?; + + let stem = Path::new(pdb_file_name) + .file_stem() + .ok_or_else(|| { + format_err!( + "unable to identify file stem from path: {}", + pdb_path.display() + ) + })? + .to_string_lossy(); + + // add module without extension + srcview.insert(&stem, &pdb_path)?; + // add common module extensions + for ext in ["sys", "exe", "dll"] { + srcview.insert(&format!("{}.{}", stem, ext), pdb_path)?; + } + Ok(()) +} + +fn srcloc(opts: SrcLocOpt) -> Result<()> { + let modoff_data = fs::read_to_string(&opts.modoff_path) + .with_context(|| format!("unable to read modoff_path: {}", opts.modoff_path.display()))?; + let modoffs = ModOff::parse(&modoff_data)?; + let mut srcview = SrcView::new(); + + if let Some(module_name) = &opts.module_name { + srcview.insert(module_name, &opts.pdb_path)?; + } else { + add_common_extensions(&mut srcview, &opts.pdb_path)?; + } + + for modoff in &modoffs { + print!(" +{:04x} ", modoff.offset); + match srcview.modoff(modoff) { + Some(srcloc) => println!("{}", srcloc), + None => println!(), + } + } + + Ok(()) +} + +fn pdb_paths(opts: PdbPathsOpt) -> Result<()> { + let mut srcview = SrcView::new(); + srcview.insert(&*opts.pdb_path.to_string_lossy(), &opts.pdb_path)?; + + for path in srcview.paths() { + println!("{}", path.display()); + } + Ok(()) +} + +fn cobertura(opts: CoberturaOpt) -> Result<()> { + // read our modoff file and parse it to a vector + let modoff_data = fs::read_to_string(&opts.modoff_path)?; + let modoffs = ModOff::parse(&modoff_data)?; + + // create our new SrcView and insert our only pdb into it + // we don't know what the modoff module will be, so create a mapping from + // all likely names to the pdb + let mut srcview = SrcView::new(); + + if let Some(module_name) = &opts.module_name { + srcview.insert(module_name, &opts.pdb_path)?; + } else { + add_common_extensions(&mut srcview, &opts.pdb_path)?; + } + + // Convert our ModOffs to SrcLine so we can draw it + let coverage: Vec = modoffs + .into_iter() + .filter_map(|m| srcview.modoff(&m)) + .collect(); + + // Generate our report, filtering on our example path + let r = Report::new(&coverage, &srcview, opts.include_regex.as_deref())?; + + // Format it as cobertura and display it + let formatted = r.cobertura(opts.filter_regex.as_deref())?; + println!("{}", formatted); + + Ok(()) +} diff --git a/src/agent/srcview/src/modoff.rs b/src/agent/srcview/src/modoff.rs index ae8eb18af..777a5e41f 100644 --- a/src/agent/srcview/src/modoff.rs +++ b/src/agent/srcview/src/modoff.rs @@ -149,49 +149,55 @@ impl ModOff { #[cfg(test)] mod tests { use super::*; + use anyhow::Result; #[test] - fn parse_empty() { + fn parse_empty() -> Result<()> { let empty: Vec = Vec::new(); - assert_eq!(empty, ModOff::parse("").unwrap()); + assert_eq!(empty, ModOff::parse("")?); + Ok(()) } #[test] - fn parse_good() { + fn parse_good() -> Result<()> { assert_eq!( vec![ModOff::new("foo.exe", 0x4141)], - ModOff::parse("foo.exe+4141").unwrap() + ModOff::parse("foo.exe+4141")? ); + Ok(()) } #[test] - fn parse_good_multiple_unix() { + fn parse_good_multiple_unix() -> Result<()> { assert_eq!( vec![ ModOff::new("foo.exe", 0x4141), ModOff::new("foo.exe", 0x4242) ], - ModOff::parse("foo.exe+4141\nfoo.exe+4242").unwrap() + ModOff::parse("foo.exe+4141\nfoo.exe+4242")? ); + Ok(()) } #[test] - fn parse_good_multiple_windows() { + fn parse_good_multiple_windows() -> Result<()> { assert_eq!( vec![ ModOff::new("foo.exe", 0x4141), ModOff::new("foo.exe", 0x4242), ], - ModOff::parse("foo.exe+4141\r\nfoo.exe+4242").unwrap() + ModOff::parse("foo.exe+4141\r\nfoo.exe+4242")? ); + Ok(()) } #[test] - fn parse_good_leading_0x() { + fn parse_good_leading_0x() -> Result<()> { assert_eq!( vec![ModOff::new("foo.exe", 0x4141)], - ModOff::parse("foo.exe+0x4141").unwrap() + ModOff::parse("foo.exe+0x4141")? ); + Ok(()) } #[test] diff --git a/src/agent/srcview/src/pdbcache.rs b/src/agent/srcview/src/pdbcache.rs index e8444cb20..1274612d0 100644 --- a/src/agent/srcview/src/pdbcache.rs +++ b/src/agent/srcview/src/pdbcache.rs @@ -5,6 +5,7 @@ use std::collections::BTreeMap; use std::fs::File; use std::path::{Path, PathBuf}; +use anyhow::{format_err, Result}; use log::*; use pdb::{FallibleIterator, SymbolData, PDB}; use serde::{Deserialize, Serialize}; @@ -20,7 +21,7 @@ pub struct PdbCache { } impl PdbCache { - pub fn new>(pdb: P) -> Result> { + pub fn new>(pdb: P) -> Result { let mut offset_to_line: BTreeMap = BTreeMap::new(); let mut symbol_to_lines: BTreeMap> = BTreeMap::new(); let mut path_to_symbols: BTreeMap> = BTreeMap::new(); @@ -50,7 +51,10 @@ impl PdbCache { if let Ok(SymbolData::Procedure(proc)) = symbol.parse() { let mut lines = program.lines_at_offset(proc.offset); while let Some(line_info) = lines.next()? { - let rva = line_info.offset.to_rva(&address_map).expect("invalid rva"); + let rva = line_info + .offset + .to_rva(&address_map) + .ok_or_else(|| format_err!("invalid RVA: {:?}", line_info))?; let file_info = program.get_file_info(line_info.file_index)?; let file_name = file_info.name.to_string_lossy(&string_table)?; diff --git a/src/agent/srcview/src/report.rs b/src/agent/srcview/src/report.rs index 7e087f96d..f08383d13 100644 --- a/src/agent/srcview/src/report.rs +++ b/src/agent/srcview/src/report.rs @@ -6,6 +6,8 @@ use std::fmt; use std::path::{Path, PathBuf}; use std::time::{SystemTime, UNIX_EPOCH}; +use anyhow::{format_err, Context, Result}; +use log::warn; use regex::Regex; use xml::writer::{EmitterConfig, XmlEvent}; @@ -79,9 +81,9 @@ impl Report { coverage: &[SrcLine], srcview: &SrcView, include_regex: Option<&str>, - ) -> Result> { + ) -> Result { let include = include_regex.map(|f| Regex::new(f)).transpose()?; - let filecov = Self::compute_filecov(coverage, srcview, &include); + let filecov = Self::compute_filecov(coverage, srcview, &include)?; // should this function take &[ModOff] and perform the conversion itself? @@ -102,19 +104,21 @@ impl Report { coverage: &[SrcLine], srcview: &SrcView, include: &Option, - ) -> BTreeMap { + ) -> Result> { let uniq_cov: BTreeSet = coverage.iter().cloned().collect(); let mut filecov = BTreeMap::new(); for path in srcview.paths() { - if !Self::relevant_path(path, include) { + if !Self::relevant_path(path, include)? { continue; } let path_srclocs: Vec = srcview .path_lines(path) - .unwrap() + .ok_or_else(|| { + format_err!("unable to find path lines in path: {}", path.display()) + })? .map(|line| SrcLine::new(path, line)) .collect(); @@ -135,8 +139,11 @@ impl Report { if let Some(path_symbols) = srcview.path_symbols(path) { for symbol in path_symbols { - let symbol_srclocs: BTreeSet = - srcview.symbol(&symbol).unwrap().cloned().collect(); + let symbol_srclocs: BTreeSet = srcview + .symbol(&symbol) + .ok_or_else(|| format_err!("unable to resolve symbol: {}", symbol))? + .cloned() + .collect(); symbols.insert(symbol, symbol_srclocs); } @@ -152,7 +159,7 @@ impl Report { ); } - filecov + Ok(filecov) } // should only be called from `new`, function to initialize directory coverage and overall @@ -183,10 +190,10 @@ impl Report { // get every file that matches this directory and total it for file in self.filter_files(dir) { - let cov = self.file(&file).unwrap(); - - hits += cov.hits.len(); - lines += cov.lines.len(); + if let Some(cov) = self.file(&file) { + hits += cov.hits.len(); + lines += cov.lines.len(); + } } self.dircov @@ -198,10 +205,18 @@ impl Report { if let Some(root) = anc.last() { // at this point we know we've computed this - let dircov = *self.dircov.get(root).unwrap(); - - // we don't really care if we're overwriting it - overall.insert(root.to_path_buf(), dircov); + match self.dircov.get(root) { + Some(dircov) => { + // we don't really care if we're overwriting it + overall.insert(root.to_path_buf(), *dircov); + } + None => { + warn!( + "unable to get root for path for directory stats. root: {}", + root.display() + ); + } + } } } @@ -249,38 +264,42 @@ impl Report { false } - // wrapper to allow eronomic filtering with an option - fn filter_path + fmt::Debug>(path: P, filter: &Option) -> PathBuf { + // wrapper to allow ergonomic filtering with an option + fn filter_path + fmt::Debug>( + path: P, + filter: &Option, + ) -> Result { match filter { Some(regex) => { // we need our path as a string to regex it - let path_string = path - .as_ref() - .to_str() - .unwrap_or_else(|| panic!("could not utf8 decode path: {:?}", path)); + let path_string = path.as_ref().to_str().ok_or_else(|| { + format_err!("could not utf8 decode path: {}", path.as_ref().display()) + })?; let filtered = regex.replace(path_string, "").into_owned(); - PathBuf::from(filtered) + Ok(PathBuf::from(filtered)) } - None => path.as_ref().to_path_buf(), + None => Ok(path.as_ref().to_path_buf()), } } // wrapper to allow ergonomic testing of our include regex inside an option against a // path - fn relevant_path + fmt::Debug>(path: P, include: &Option) -> bool { + fn relevant_path + fmt::Debug>( + path: P, + include: &Option, + ) -> Result { match include { Some(regex) => { // we need our path as a string to regex it - let path_string = path - .as_ref() - .to_str() - .unwrap_or_else(|| panic!("could not utf8 decode path: {:?}", path)); + let path_string = path.as_ref().to_str().ok_or_else(|| { + format_err!("could not utf8 decode path: {}", path.as_ref().display()) + })?; - regex.is_match(path_string) + Ok(regex.is_match(path_string)) } - None => true, + None => Ok(true), } } @@ -332,10 +351,7 @@ impl Report { /// /// println!("{}", xml); /// ``` - pub fn cobertura( - &self, - filter_regex: Option<&str>, - ) -> Result> { + pub fn cobertura(&self, filter_regex: Option<&str>) -> Result { let filter = filter_regex.map(|f| Regex::new(f)).transpose()?; let mut backing: Vec = Vec::new(); @@ -347,7 +363,7 @@ impl Report { let unixtime = SystemTime::now() .duration_since(UNIX_EPOCH) - .expect("system time before unix epoch, wtf") + .context("system time before unix epoch")? .as_secs(); ew.write( @@ -380,7 +396,7 @@ impl Report { continue; } - let display_dir = Self::filter_path(dir, &filter).display().to_string(); + let display_dir = Self::filter_path(dir, &filter)?.display().to_string(); ew.write(XmlEvent::start_element("package").attr("name", &display_dir))?; ew.write(XmlEvent::start_element("classes"))?; @@ -390,9 +406,15 @@ impl Report { // for path in self.filter_files(dir) { - let display_path = Self::filter_path(path, &filter).display().to_string(); + let display_path = Self::filter_path(path, &filter)?.display().to_string(); - let filecov = self.file(path).unwrap(); + let filecov = match self.file(path) { + Some(filecov) => filecov, + None => { + warn!("unable to find coverage for path: {}", path.display()); + continue; + } + }; let file_srclocs: BTreeSet = filecov .lines diff --git a/src/agent/srcview/src/srcview.rs b/src/agent/srcview/src/srcview.rs index b001a450b..87120bd20 100644 --- a/src/agent/srcview/src/srcview.rs +++ b/src/agent/srcview/src/srcview.rs @@ -4,6 +4,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::path::{Path, PathBuf}; +use anyhow::Result; use serde::{Deserialize, Serialize}; use crate::{ModOff, PdbCache, SrcLine}; @@ -43,11 +44,7 @@ impl SrcView { /// /// // you can now query sv for info from example.exe... /// ``` - pub fn insert>( - &mut self, - module: &str, - pdb: P, - ) -> Result, Box> { + pub fn insert>(&mut self, module: &str, pdb: P) -> Result> { let cache = PdbCache::new(pdb)?; Ok(self.0.insert(module.to_owned(), cache)) } diff --git a/src/ci/agent.sh b/src/ci/agent.sh index e8717a492..280211caa 100755 --- a/src/ci/agent.sh +++ b/src/ci/agent.sh @@ -55,6 +55,7 @@ cargo build --release --manifest-path ./onefuzz-telemetry/Cargo.toml --all-featu cp target/release/onefuzz-agent* ../../artifacts/agent-$(uname) cp target/release/onefuzz-supervisor* ../../artifacts/agent-$(uname) +cp target/release/srcview* ../../artifacts/agent-$(uname) if exists target/release/*.pdb; then for file in target/release/*.pdb; do