add POC srcview binary (#1146)

This commit is contained in:
bmc-msft
2021-08-25 10:30:44 -04:00
committed by GitHub
parent 5939cf8180
commit 2eaad6decc
9 changed files with 271 additions and 56 deletions

3
src/agent/Cargo.lock generated
View File

@ -2532,11 +2532,14 @@ dependencies = [
name = "srcview" name = "srcview"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow",
"env_logger 0.9.0",
"log", "log",
"nom 7.0.0", "nom 7.0.0",
"pdb", "pdb",
"regex", "regex",
"serde", "serde",
"structopt",
"xml-rs", "xml-rs",
] ]

1
src/agent/srcview/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
res/example.pdb

View File

@ -16,3 +16,6 @@ pdb = "0.7"
regex = "1" regex = "1"
serde = { version = "1", features = ["derive"] } serde = { version = "1", features = ["derive"] }
xml-rs = "0.8" xml-rs = "0.8"
anyhow = "1.0"
structopt = "0.3"
env_logger = "0.9"

View File

@ -0,0 +1,178 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use anyhow::{format_err, Context, Result};
use srcview::{ModOff, Report, SrcLine, SrcView};
use std::fs;
use std::io::{stdout, Write};
use std::path::{Path, PathBuf};
use structopt::StructOpt;
#[derive(StructOpt, Debug)]
enum Opt {
Srcloc(SrcLocOpt),
PdbPaths(PdbPathsOpt),
Cobertura(CoberturaOpt),
/// Print 3rd-party license information
Licenses,
}
/// Print the file paths in the provided PDB
#[derive(StructOpt, Debug)]
struct PdbPathsOpt {
pdb_path: PathBuf,
}
/// Print modoffset file with file and source lines
#[derive(StructOpt, Debug)]
struct SrcLocOpt {
pdb_path: PathBuf,
modoff_path: PathBuf,
#[structopt(long)]
module_name: Option<String>,
}
/// Generate a Cobertura XML coverage report
///
/// Example:
/// srcview cobertura ./res/example.pdb res/example.txt
/// --include-regex "E:\\\\1f\\\\coverage\\\\"
/// --filter-regex "E:\\\\1f\\\\coverage\\\\"
/// --module-name example.exe
///
/// In this example, only files that live in E:\1f\coverage are included and
/// E:\1f\coverage is removed from the filenames in the resulting XML report.
#[derive(StructOpt, Debug)]
struct CoberturaOpt {
pdb_path: PathBuf,
modoff_path: PathBuf,
#[structopt(long)]
module_name: Option<String>,
/// regular expression that will be applied against the file paths from the
/// srcview
#[structopt(long)]
include_regex: Option<String>,
/// search and replace regular expression that is applied to all file
/// paths that will appear in the output report
#[structopt(long)]
filter_regex: Option<String>,
}
fn main() -> Result<()> {
env_logger::init();
let opt = Opt::from_args();
match opt {
Opt::Srcloc(opts) => srcloc(opts)?,
Opt::PdbPaths(opts) => pdb_paths(opts)?,
Opt::Cobertura(opts) => cobertura(opts)?,
Opt::Licenses => licenses()?,
};
Ok(())
}
fn licenses() -> Result<()> {
stdout().write_all(include_bytes!("../../../data/licenses.json"))?;
Ok(())
}
// In the case the user did not specify the module name of interest, this
// utility function will guess at the module name based on the PDB path name.
//
// This is a last-ditch effort to ensure the coverage report has something
// consumable.
fn add_common_extensions(srcview: &mut SrcView, pdb_path: &Path) -> Result<()> {
let pdb_file_name = pdb_path.file_name().ok_or_else(|| {
format_err!(
"unable to identify file name from path: {}",
pdb_path.display()
)
})?;
let stem = Path::new(pdb_file_name)
.file_stem()
.ok_or_else(|| {
format_err!(
"unable to identify file stem from path: {}",
pdb_path.display()
)
})?
.to_string_lossy();
// add module without extension
srcview.insert(&stem, &pdb_path)?;
// add common module extensions
for ext in ["sys", "exe", "dll"] {
srcview.insert(&format!("{}.{}", stem, ext), pdb_path)?;
}
Ok(())
}
fn srcloc(opts: SrcLocOpt) -> Result<()> {
let modoff_data = fs::read_to_string(&opts.modoff_path)
.with_context(|| format!("unable to read modoff_path: {}", opts.modoff_path.display()))?;
let modoffs = ModOff::parse(&modoff_data)?;
let mut srcview = SrcView::new();
if let Some(module_name) = &opts.module_name {
srcview.insert(module_name, &opts.pdb_path)?;
} else {
add_common_extensions(&mut srcview, &opts.pdb_path)?;
}
for modoff in &modoffs {
print!(" +{:04x} ", modoff.offset);
match srcview.modoff(modoff) {
Some(srcloc) => println!("{}", srcloc),
None => println!(),
}
}
Ok(())
}
fn pdb_paths(opts: PdbPathsOpt) -> Result<()> {
let mut srcview = SrcView::new();
srcview.insert(&*opts.pdb_path.to_string_lossy(), &opts.pdb_path)?;
for path in srcview.paths() {
println!("{}", path.display());
}
Ok(())
}
fn cobertura(opts: CoberturaOpt) -> Result<()> {
// read our modoff file and parse it to a vector
let modoff_data = fs::read_to_string(&opts.modoff_path)?;
let modoffs = ModOff::parse(&modoff_data)?;
// create our new SrcView and insert our only pdb into it
// we don't know what the modoff module will be, so create a mapping from
// all likely names to the pdb
let mut srcview = SrcView::new();
if let Some(module_name) = &opts.module_name {
srcview.insert(module_name, &opts.pdb_path)?;
} else {
add_common_extensions(&mut srcview, &opts.pdb_path)?;
}
// Convert our ModOffs to SrcLine so we can draw it
let coverage: Vec<SrcLine> = modoffs
.into_iter()
.filter_map(|m| srcview.modoff(&m))
.collect();
// Generate our report, filtering on our example path
let r = Report::new(&coverage, &srcview, opts.include_regex.as_deref())?;
// Format it as cobertura and display it
let formatted = r.cobertura(opts.filter_regex.as_deref())?;
println!("{}", formatted);
Ok(())
}

View File

@ -149,49 +149,55 @@ impl ModOff {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use anyhow::Result;
#[test] #[test]
fn parse_empty() { fn parse_empty() -> Result<()> {
let empty: Vec<ModOff> = Vec::new(); let empty: Vec<ModOff> = Vec::new();
assert_eq!(empty, ModOff::parse("").unwrap()); assert_eq!(empty, ModOff::parse("")?);
Ok(())
} }
#[test] #[test]
fn parse_good() { fn parse_good() -> Result<()> {
assert_eq!( assert_eq!(
vec![ModOff::new("foo.exe", 0x4141)], vec![ModOff::new("foo.exe", 0x4141)],
ModOff::parse("foo.exe+4141").unwrap() ModOff::parse("foo.exe+4141")?
); );
Ok(())
} }
#[test] #[test]
fn parse_good_multiple_unix() { fn parse_good_multiple_unix() -> Result<()> {
assert_eq!( assert_eq!(
vec![ vec![
ModOff::new("foo.exe", 0x4141), ModOff::new("foo.exe", 0x4141),
ModOff::new("foo.exe", 0x4242) ModOff::new("foo.exe", 0x4242)
], ],
ModOff::parse("foo.exe+4141\nfoo.exe+4242").unwrap() ModOff::parse("foo.exe+4141\nfoo.exe+4242")?
); );
Ok(())
} }
#[test] #[test]
fn parse_good_multiple_windows() { fn parse_good_multiple_windows() -> Result<()> {
assert_eq!( assert_eq!(
vec![ vec![
ModOff::new("foo.exe", 0x4141), ModOff::new("foo.exe", 0x4141),
ModOff::new("foo.exe", 0x4242), ModOff::new("foo.exe", 0x4242),
], ],
ModOff::parse("foo.exe+4141\r\nfoo.exe+4242").unwrap() ModOff::parse("foo.exe+4141\r\nfoo.exe+4242")?
); );
Ok(())
} }
#[test] #[test]
fn parse_good_leading_0x() { fn parse_good_leading_0x() -> Result<()> {
assert_eq!( assert_eq!(
vec![ModOff::new("foo.exe", 0x4141)], vec![ModOff::new("foo.exe", 0x4141)],
ModOff::parse("foo.exe+0x4141").unwrap() ModOff::parse("foo.exe+0x4141")?
); );
Ok(())
} }
#[test] #[test]

View File

@ -5,6 +5,7 @@ use std::collections::BTreeMap;
use std::fs::File; use std::fs::File;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use anyhow::{format_err, Result};
use log::*; use log::*;
use pdb::{FallibleIterator, SymbolData, PDB}; use pdb::{FallibleIterator, SymbolData, PDB};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -20,7 +21,7 @@ pub struct PdbCache {
} }
impl PdbCache { impl PdbCache {
pub fn new<P: AsRef<Path>>(pdb: P) -> Result<Self, Box<dyn std::error::Error>> { pub fn new<P: AsRef<Path>>(pdb: P) -> Result<Self> {
let mut offset_to_line: BTreeMap<usize, SrcLine> = BTreeMap::new(); let mut offset_to_line: BTreeMap<usize, SrcLine> = BTreeMap::new();
let mut symbol_to_lines: BTreeMap<String, Vec<SrcLine>> = BTreeMap::new(); let mut symbol_to_lines: BTreeMap<String, Vec<SrcLine>> = BTreeMap::new();
let mut path_to_symbols: BTreeMap<PathBuf, Vec<String>> = BTreeMap::new(); let mut path_to_symbols: BTreeMap<PathBuf, Vec<String>> = BTreeMap::new();
@ -50,7 +51,10 @@ impl PdbCache {
if let Ok(SymbolData::Procedure(proc)) = symbol.parse() { if let Ok(SymbolData::Procedure(proc)) = symbol.parse() {
let mut lines = program.lines_at_offset(proc.offset); let mut lines = program.lines_at_offset(proc.offset);
while let Some(line_info) = lines.next()? { while let Some(line_info) = lines.next()? {
let rva = line_info.offset.to_rva(&address_map).expect("invalid rva"); let rva = line_info
.offset
.to_rva(&address_map)
.ok_or_else(|| format_err!("invalid RVA: {:?}", line_info))?;
let file_info = program.get_file_info(line_info.file_index)?; let file_info = program.get_file_info(line_info.file_index)?;
let file_name = file_info.name.to_string_lossy(&string_table)?; let file_name = file_info.name.to_string_lossy(&string_table)?;

View File

@ -6,6 +6,8 @@ use std::fmt;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
use anyhow::{format_err, Context, Result};
use log::warn;
use regex::Regex; use regex::Regex;
use xml::writer::{EmitterConfig, XmlEvent}; use xml::writer::{EmitterConfig, XmlEvent};
@ -79,9 +81,9 @@ impl Report {
coverage: &[SrcLine], coverage: &[SrcLine],
srcview: &SrcView, srcview: &SrcView,
include_regex: Option<&str>, include_regex: Option<&str>,
) -> Result<Self, Box<dyn std::error::Error>> { ) -> Result<Self> {
let include = include_regex.map(|f| Regex::new(f)).transpose()?; let include = include_regex.map(|f| Regex::new(f)).transpose()?;
let filecov = Self::compute_filecov(coverage, srcview, &include); let filecov = Self::compute_filecov(coverage, srcview, &include)?;
// should this function take &[ModOff] and perform the conversion itself? // should this function take &[ModOff] and perform the conversion itself?
@ -102,19 +104,21 @@ impl Report {
coverage: &[SrcLine], coverage: &[SrcLine],
srcview: &SrcView, srcview: &SrcView,
include: &Option<Regex>, include: &Option<Regex>,
) -> BTreeMap<PathBuf, FileCov> { ) -> Result<BTreeMap<PathBuf, FileCov>> {
let uniq_cov: BTreeSet<SrcLine> = coverage.iter().cloned().collect(); let uniq_cov: BTreeSet<SrcLine> = coverage.iter().cloned().collect();
let mut filecov = BTreeMap::new(); let mut filecov = BTreeMap::new();
for path in srcview.paths() { for path in srcview.paths() {
if !Self::relevant_path(path, include) { if !Self::relevant_path(path, include)? {
continue; continue;
} }
let path_srclocs: Vec<SrcLine> = srcview let path_srclocs: Vec<SrcLine> = srcview
.path_lines(path) .path_lines(path)
.unwrap() .ok_or_else(|| {
format_err!("unable to find path lines in path: {}", path.display())
})?
.map(|line| SrcLine::new(path, line)) .map(|line| SrcLine::new(path, line))
.collect(); .collect();
@ -135,8 +139,11 @@ impl Report {
if let Some(path_symbols) = srcview.path_symbols(path) { if let Some(path_symbols) = srcview.path_symbols(path) {
for symbol in path_symbols { for symbol in path_symbols {
let symbol_srclocs: BTreeSet<SrcLine> = let symbol_srclocs: BTreeSet<SrcLine> = srcview
srcview.symbol(&symbol).unwrap().cloned().collect(); .symbol(&symbol)
.ok_or_else(|| format_err!("unable to resolve symbol: {}", symbol))?
.cloned()
.collect();
symbols.insert(symbol, symbol_srclocs); symbols.insert(symbol, symbol_srclocs);
} }
@ -152,7 +159,7 @@ impl Report {
); );
} }
filecov Ok(filecov)
} }
// should only be called from `new`, function to initialize directory coverage and overall // should only be called from `new`, function to initialize directory coverage and overall
@ -183,10 +190,10 @@ impl Report {
// get every file that matches this directory and total it // get every file that matches this directory and total it
for file in self.filter_files(dir) { for file in self.filter_files(dir) {
let cov = self.file(&file).unwrap(); if let Some(cov) = self.file(&file) {
hits += cov.hits.len();
hits += cov.hits.len(); lines += cov.lines.len();
lines += cov.lines.len(); }
} }
self.dircov self.dircov
@ -198,10 +205,18 @@ impl Report {
if let Some(root) = anc.last() { if let Some(root) = anc.last() {
// at this point we know we've computed this // at this point we know we've computed this
let dircov = *self.dircov.get(root).unwrap(); match self.dircov.get(root) {
Some(dircov) => {
// we don't really care if we're overwriting it // we don't really care if we're overwriting it
overall.insert(root.to_path_buf(), dircov); overall.insert(root.to_path_buf(), *dircov);
}
None => {
warn!(
"unable to get root for path for directory stats. root: {}",
root.display()
);
}
}
} }
} }
@ -249,38 +264,42 @@ impl Report {
false false
} }
// wrapper to allow eronomic filtering with an option // wrapper to allow ergonomic filtering with an option
fn filter_path<P: AsRef<Path> + fmt::Debug>(path: P, filter: &Option<Regex>) -> PathBuf { fn filter_path<P: AsRef<Path> + fmt::Debug>(
path: P,
filter: &Option<Regex>,
) -> Result<PathBuf> {
match filter { match filter {
Some(regex) => { Some(regex) => {
// we need our path as a string to regex it // we need our path as a string to regex it
let path_string = path let path_string = path.as_ref().to_str().ok_or_else(|| {
.as_ref() format_err!("could not utf8 decode path: {}", path.as_ref().display())
.to_str() })?;
.unwrap_or_else(|| panic!("could not utf8 decode path: {:?}", path));
let filtered = regex.replace(path_string, "").into_owned(); let filtered = regex.replace(path_string, "").into_owned();
PathBuf::from(filtered) Ok(PathBuf::from(filtered))
} }
None => path.as_ref().to_path_buf(), None => Ok(path.as_ref().to_path_buf()),
} }
} }
// wrapper to allow ergonomic testing of our include regex inside an option against a // wrapper to allow ergonomic testing of our include regex inside an option against a
// path // path
fn relevant_path<P: AsRef<Path> + fmt::Debug>(path: P, include: &Option<Regex>) -> bool { fn relevant_path<P: AsRef<Path> + fmt::Debug>(
path: P,
include: &Option<Regex>,
) -> Result<bool> {
match include { match include {
Some(regex) => { Some(regex) => {
// we need our path as a string to regex it // we need our path as a string to regex it
let path_string = path let path_string = path.as_ref().to_str().ok_or_else(|| {
.as_ref() format_err!("could not utf8 decode path: {}", path.as_ref().display())
.to_str() })?;
.unwrap_or_else(|| panic!("could not utf8 decode path: {:?}", path));
regex.is_match(path_string) Ok(regex.is_match(path_string))
} }
None => true, None => Ok(true),
} }
} }
@ -332,10 +351,7 @@ impl Report {
/// ///
/// println!("{}", xml); /// println!("{}", xml);
/// ``` /// ```
pub fn cobertura( pub fn cobertura(&self, filter_regex: Option<&str>) -> Result<String> {
&self,
filter_regex: Option<&str>,
) -> Result<String, Box<dyn std::error::Error>> {
let filter = filter_regex.map(|f| Regex::new(f)).transpose()?; let filter = filter_regex.map(|f| Regex::new(f)).transpose()?;
let mut backing: Vec<u8> = Vec::new(); let mut backing: Vec<u8> = Vec::new();
@ -347,7 +363,7 @@ impl Report {
let unixtime = SystemTime::now() let unixtime = SystemTime::now()
.duration_since(UNIX_EPOCH) .duration_since(UNIX_EPOCH)
.expect("system time before unix epoch, wtf") .context("system time before unix epoch")?
.as_secs(); .as_secs();
ew.write( ew.write(
@ -380,7 +396,7 @@ impl Report {
continue; continue;
} }
let display_dir = Self::filter_path(dir, &filter).display().to_string(); let display_dir = Self::filter_path(dir, &filter)?.display().to_string();
ew.write(XmlEvent::start_element("package").attr("name", &display_dir))?; ew.write(XmlEvent::start_element("package").attr("name", &display_dir))?;
ew.write(XmlEvent::start_element("classes"))?; ew.write(XmlEvent::start_element("classes"))?;
@ -390,9 +406,15 @@ impl Report {
// //
for path in self.filter_files(dir) { for path in self.filter_files(dir) {
let display_path = Self::filter_path(path, &filter).display().to_string(); let display_path = Self::filter_path(path, &filter)?.display().to_string();
let filecov = self.file(path).unwrap(); let filecov = match self.file(path) {
Some(filecov) => filecov,
None => {
warn!("unable to find coverage for path: {}", path.display());
continue;
}
};
let file_srclocs: BTreeSet<SrcLine> = filecov let file_srclocs: BTreeSet<SrcLine> = filecov
.lines .lines

View File

@ -4,6 +4,7 @@
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use anyhow::Result;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{ModOff, PdbCache, SrcLine}; use crate::{ModOff, PdbCache, SrcLine};
@ -43,11 +44,7 @@ impl SrcView {
/// ///
/// // you can now query sv for info from example.exe... /// // you can now query sv for info from example.exe...
/// ``` /// ```
pub fn insert<P: AsRef<Path>>( pub fn insert<P: AsRef<Path>>(&mut self, module: &str, pdb: P) -> Result<Option<PdbCache>> {
&mut self,
module: &str,
pdb: P,
) -> Result<Option<PdbCache>, Box<dyn std::error::Error>> {
let cache = PdbCache::new(pdb)?; let cache = PdbCache::new(pdb)?;
Ok(self.0.insert(module.to_owned(), cache)) Ok(self.0.insert(module.to_owned(), cache))
} }

View File

@ -55,6 +55,7 @@ cargo build --release --manifest-path ./onefuzz-telemetry/Cargo.toml --all-featu
cp target/release/onefuzz-agent* ../../artifacts/agent-$(uname) cp target/release/onefuzz-agent* ../../artifacts/agent-$(uname)
cp target/release/onefuzz-supervisor* ../../artifacts/agent-$(uname) cp target/release/onefuzz-supervisor* ../../artifacts/agent-$(uname)
cp target/release/srcview* ../../artifacts/agent-$(uname)
if exists target/release/*.pdb; then if exists target/release/*.pdb; then
for file in target/release/*.pdb; do for file in target/release/*.pdb; do