initial public release

This commit is contained in:
Brian Caswell
2020-09-18 12:21:04 -04:00
parent 9c3aa0bdfb
commit d3a0b292e6
387 changed files with 43810 additions and 28 deletions

View File

@ -0,0 +1,34 @@
[package]
name = "onefuzz-agent"
version = "0.2.0"
authors = ["fuzzing@microsoft.com"]
edition = "2018"
publish = false
license = "MIT"
[features]
integration_test=[]
[dependencies]
anyhow = "1.0"
appinsights = "0.1"
async-trait = "0.1"
clap = "2.33"
env_logger = "0.7"
futures = "0.3"
hex = "0.4"
lazy_static = "1.4"
log = "0.4"
num_cpus = "1.13"
reqwest = { version = "0.10", features = ["json", "stream"] }
serde = "1.0"
serde_json = "1.0"
tokio = { version = "0.2", features = ["full"] }
tokio-util = { version = "0.3", features = ["full"] }
url = { version = "2.1", features = ["serde"] }
uuid = { version = "0.8", features = ["serde", "v4"] }
onefuzz = { path = "../onefuzz" }
storage-queue = { path = "../storage-queue" }
[dev-dependencies]
tempfile = "3.1"

View File

@ -0,0 +1,36 @@
use std::error::Error;
use std::fs::File;
use std::io::prelude::*;
use std::process::Command;
fn run_cmd(args: &[&str]) -> Result<String, Box<dyn Error>> {
let cmd = Command::new(args[0]).args(&args[1..]).output()?;
if cmd.status.success() {
Ok(String::from_utf8_lossy(&cmd.stdout).to_string())
} else {
Err(From::from("failed"))
}
}
fn read_file(filename: &str) -> Result<String, Box<dyn Error>> {
let mut file = File::open(filename)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
Ok(contents)
}
fn main() -> Result<(), Box<dyn Error>> {
let sha = run_cmd(&["git", "rev-parse", "HEAD"])?;
let with_changes = if run_cmd(&["git", "diff", "--quiet"]).is_err() {
"-local_changes"
} else {
""
};
println!("cargo:rustc-env=GIT_VERSION={}{}", sha, with_changes);
let version = read_file("../../../CURRENT_VERSION")?;
println!("cargo:rustc-env=ONEFUZZ_VERSION={}", version);
Ok(())
}

View File

@ -0,0 +1,28 @@
start_time : 1587433304
last_update : 1587433306
fuzzer_pid : 26515
cycles_done : 8
execs_done : 13655
execs_per_sec : 2666.67
paths_total : 2
paths_favored : 2
paths_found : 1
paths_imported : 0
max_depth : 2
cur_path : 0
pending_favs : 0
pending_total : 0
variable_paths : 0
stability : 100.00%
bitmap_cvg : 0.01%
unique_crashes : 0
unique_hangs : 0
last_path : 1587433304
last_crash : 0
last_hang : 0
execs_since_crash : 13655
exec_timeout : 20
afl_banner : fuzz.exe
afl_version : 2.52b
target_mode : default
command_line : afl-fuzz -i inputs -o OUT -- /home/bcaswell/projects/onefuzz/samples/afl-clang/fuzz.exe

View File

@ -0,0 +1,24 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use anyhow::Result;
use clap::{App, SubCommand};
pub fn run(args: &clap::ArgMatches) -> Result<()> {
match args.subcommand() {
("generic-crash-report", Some(sub)) => crate::debug::generic_crash_report::run(sub)?,
("libfuzzer-coverage", Some(sub)) => crate::debug::libfuzzer_coverage::run(sub)?,
("libfuzzer-crash-report", Some(sub)) => crate::debug::libfuzzer_crash_report::run(sub)?,
_ => println!("missing subcommand\nUSAGE : {}", args.usage()),
}
Ok(())
}
pub fn args() -> App<'static, 'static> {
SubCommand::with_name("debug")
.about("unsupported internal debugging commands")
.subcommand(crate::debug::generic_crash_report::args())
.subcommand(crate::debug::libfuzzer_coverage::args())
.subcommand(crate::debug::libfuzzer_crash_report::args())
}

View File

@ -0,0 +1,121 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::tasks::{
config::{CommonConfig, SyncedDir},
report::generic::{Config, GenericReportProcessor},
utils::parse_key_value,
};
use anyhow::Result;
use clap::{App, Arg, SubCommand};
use onefuzz::blob::BlobContainerUrl;
use std::{
collections::HashMap,
path::{Path, PathBuf},
};
use tokio::runtime::Runtime;
use url::Url;
use uuid::Uuid;
async fn run_impl(input: String, config: Config) -> Result<()> {
let input_path = Path::new(&input);
let test_url = Url::parse("https://contoso.com/sample-container/blob.txt")?;
let processor = GenericReportProcessor::new(&config);
let result = processor.test_input(test_url, input_path).await?;
println!("{:#?}", result);
Ok(())
}
pub fn run(args: &clap::ArgMatches) -> Result<()> {
let target_exe = value_t!(args, "target_exe", PathBuf)?;
let input = value_t!(args, "input", String)?;
let target_timeout = value_t!(args, "target_timeout", u64).ok();
let check_retry_count = value_t!(args, "check_retry_count", u64)?;
let target_options = args.values_of_lossy("target_options").unwrap_or_default();
let check_asan_log = args.is_present("check_asan_log");
let check_debugger = !args.is_present("disable_check_debugger");
let mut target_env = HashMap::new();
for opt in args.values_of_lossy("target_env").unwrap_or_default() {
let (k, v) = parse_key_value(opt)?;
target_env.insert(k, v);
}
let config = Config {
target_exe,
target_env,
target_options,
target_timeout,
check_asan_log,
check_debugger,
check_retry_count,
crashes: None,
input_queue: None,
no_repro: None,
reports: None,
unique_reports: SyncedDir {
path: "unique_reports".into(),
url: BlobContainerUrl::new(url::Url::parse("https://contoso.com/unique_reports")?)?,
},
common: CommonConfig {
heartbeat_queue: None,
instrumentation_key: None,
telemetry_key: None,
job_id: Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(),
task_id: Uuid::parse_str("11111111-1111-1111-1111-111111111111").unwrap(),
},
};
let mut rt = Runtime::new()?;
rt.block_on(async { run_impl(input, config).await })?;
Ok(())
}
pub fn args() -> App<'static, 'static> {
SubCommand::with_name("generic-crash-report")
.about("execute a local-only generic crash report")
.arg(
Arg::with_name("target_exe")
.takes_value(true)
.required(true),
)
.arg(Arg::with_name("input").takes_value(true).required(true))
.arg(
Arg::with_name("disable_check_debugger")
.takes_value(false)
.long("disable_check_debugger"),
)
.arg(
Arg::with_name("check_asan_log")
.takes_value(false)
.long("check_asan_log"),
)
.arg(
Arg::with_name("check_retry_count")
.takes_value(true)
.long("check_retry_count")
.default_value("0"),
)
.arg(
Arg::with_name("target_timeout")
.takes_value(true)
.long("target_timeout")
.default_value("5"),
)
.arg(
Arg::with_name("target_env")
.long("target_env")
.takes_value(true)
.multiple(true),
)
.arg(
Arg::with_name("target_options")
.long("target_options")
.takes_value(true)
.multiple(true)
.allow_hyphen_values(true)
.default_value("{input}")
.help("Supports hyphens. Recommendation: Set target_env first"),
)
}

View File

@ -0,0 +1,105 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::tasks::{
config::{CommonConfig, SyncedDir},
coverage::libfuzzer_coverage::{Config, CoverageProcessor},
utils::parse_key_value,
};
use anyhow::Result;
use clap::{App, Arg, SubCommand};
use onefuzz::blob::BlobContainerUrl;
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
};
use tokio::runtime::Runtime;
use url::Url;
use uuid::Uuid;
async fn run_impl(input: String, config: Config) -> Result<()> {
let mut processor = CoverageProcessor::new(Arc::new(config))
.await
.map_err(|e| format_err!("coverage processor failed: {:?}", e))?;
let input_path = Path::new(&input);
processor
.test_input(input_path)
.await
.map_err(|e| format_err!("test input failed {:?}", e))?;
let info = processor
.total
.info()
.await
.map_err(|e| format_err!("coverage_info failed {:?}", e))?;
println!("{:?}", info);
Ok(())
}
pub fn run(args: &clap::ArgMatches) -> Result<()> {
let target_exe = value_t!(args, "target_exe", PathBuf)?;
let input = value_t!(args, "input", String)?;
let result_dir = value_t!(args, "result_dir", String)?;
let target_options = args.values_of_lossy("target_options").unwrap_or_default();
let mut target_env = HashMap::new();
for opt in args.values_of_lossy("target_env").unwrap_or_default() {
let (k, v) = parse_key_value(opt)?;
target_env.insert(k, v);
}
let config = Config {
target_exe,
target_env,
target_options,
input_queue: None,
readonly_inputs: vec![],
coverage: SyncedDir {
path: result_dir.into(),
url: BlobContainerUrl::new(Url::parse("https://contoso.com/coverage")?)?,
},
common: CommonConfig {
heartbeat_queue: None,
instrumentation_key: None,
telemetry_key: None,
job_id: Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(),
task_id: Uuid::parse_str("11111111-1111-1111-1111-111111111111").unwrap(),
},
};
let mut rt = Runtime::new()?;
rt.block_on(run_impl(input, config))?;
Ok(())
}
pub fn args() -> App<'static, 'static> {
SubCommand::with_name("libfuzzer-coverage")
.about("execute a local-only libfuzzer coverage task")
.arg(
Arg::with_name("target_exe")
.takes_value(true)
.required(true),
)
.arg(Arg::with_name("input").takes_value(true).required(true))
.arg(
Arg::with_name("result_dir")
.takes_value(true)
.required(true),
)
.arg(
Arg::with_name("target_env")
.long("target_env")
.takes_value(true)
.multiple(true),
)
.arg(
Arg::with_name("target_options")
.long("target_options")
.takes_value(true)
.multiple(true)
.allow_hyphen_values(true)
.default_value("{input}")
.help("Supports hyphens. Recommendation: Set target_env first"),
)
}

View File

@ -0,0 +1,106 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::tasks::{
config::{CommonConfig, SyncedDir},
report::libfuzzer_report::{AsanProcessor, Config},
utils::parse_key_value,
};
use anyhow::Result;
use clap::{App, Arg, SubCommand};
use onefuzz::blob::BlobContainerUrl;
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
};
use tokio::runtime::Runtime;
use url::Url;
use uuid::Uuid;
async fn run_impl(input: String, config: Config) -> Result<()> {
let task = AsanProcessor::new(Arc::new(config));
let test_url = Url::parse("https://contoso.com/sample-container/blob.txt")?;
let input_path = Path::new(&input);
let result = task.test_input(test_url, &input_path).await;
println!("{:#?}", result);
Ok(())
}
pub fn run(args: &clap::ArgMatches) -> Result<()> {
let target_exe = value_t!(args, "target_exe", PathBuf)?;
let input = value_t!(args, "input", String)?;
let target_options = args.values_of_lossy("target_options").unwrap_or_default();
let mut target_env = HashMap::new();
for opt in args.values_of_lossy("target_env").unwrap_or_default() {
let (k, v) = parse_key_value(opt)?;
target_env.insert(k, v);
}
let target_timeout = value_t!(args, "target_timeout", u64).ok();
let check_retry_count = value_t!(args, "check_retry_count", u64)?;
let config = Config {
target_exe,
target_env,
target_options,
target_timeout,
check_retry_count,
input_queue: None,
crashes: None,
reports: None,
no_repro: None,
unique_reports: SyncedDir {
path: "unique_reports".into(),
url: BlobContainerUrl::new(Url::parse("https://contoso.com/unique_reports")?)?,
},
common: CommonConfig {
heartbeat_queue: None,
instrumentation_key: None,
telemetry_key: None,
job_id: Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(),
task_id: Uuid::parse_str("11111111-1111-1111-1111-111111111111").unwrap(),
},
};
let mut rt = Runtime::new()?;
rt.block_on(async { run_impl(input, config).await })?;
Ok(())
}
pub fn args() -> App<'static, 'static> {
SubCommand::with_name("libfuzzer-crash-report")
.about("execute a local-only libfuzzer crash report task")
.arg(
Arg::with_name("target_exe")
.takes_value(true)
.required(true),
)
.arg(Arg::with_name("input").takes_value(true).required(true))
.arg(
Arg::with_name("target_env")
.long("target_env")
.takes_value(true)
.multiple(true),
)
.arg(
Arg::with_name("target_options")
.long("target_options")
.takes_value(true)
.multiple(true)
.allow_hyphen_values(true)
.help("Supports hyphens. Recommendation: Set target_env first"),
)
.arg(
Arg::with_name("target_timeout")
.takes_value(true)
.long("target_timeout"),
)
.arg(
Arg::with_name("check_retry_count")
.takes_value(true)
.long("check_retry_count")
.default_value("0"),
)
}

View File

@ -0,0 +1,7 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
pub mod cmd;
pub mod generic_crash_report;
pub mod libfuzzer_coverage;
pub mod libfuzzer_crash_report;

View File

@ -0,0 +1,100 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#[macro_use]
extern crate anyhow;
#[macro_use]
extern crate onefuzz;
#[macro_use]
extern crate clap;
use std::path::PathBuf;
use anyhow::Result;
use clap::{App, Arg, SubCommand};
use onefuzz::telemetry::{self};
mod debug;
mod tasks;
use tasks::config::Config;
fn main() -> Result<()> {
env_logger::init();
let built_version = format!(
"{} onefuzz:{} git:{}",
crate_version!(),
env!("ONEFUZZ_VERSION"),
env!("GIT_VERSION")
);
let app = App::new("onefuzz-agent")
.version(built_version.as_str())
.arg(
Arg::with_name("config")
.long("config")
.short("c")
.takes_value(true),
)
.subcommand(debug::cmd::args())
.subcommand(SubCommand::with_name("licenses").about("display third-party licenses"));
let matches = app.get_matches();
match matches.subcommand() {
("licenses", Some(_)) => {
return licenses();
}
("debug", Some(sub)) => return crate::debug::cmd::run(sub),
_ => {} // no subcommand
}
if matches.value_of("config").is_none() {
println!("Missing '--config'\n{}", matches.usage());
return Ok(());
}
let config_path: PathBuf = matches.value_of("config").unwrap().parse()?;
let config = Config::from_file(config_path)?;
init_telemetry(&config);
verbose!("config parsed");
let mut rt = tokio::runtime::Runtime::new()?;
let result = rt.block_on(config.run());
if let Err(err) = &result {
error!("error running task: {}", err);
}
telemetry::try_flush_and_close();
result
}
fn licenses() -> Result<()> {
use std::io::{self, Write};
io::stdout().write_all(include_bytes!("../../data/licenses.json"))?;
Ok(())
}
fn init_telemetry(config: &Config) {
let inst_key = config
.common()
.instrumentation_key
.map(|k| k.to_string())
.unwrap_or_else(|| "".to_string());
let tele_key = config
.common()
.telemetry_key
.map(|k| k.to_string())
.unwrap_or_else(|| "".to_string());
telemetry::set_appinsights_clients(inst_key, tele_key);
}

View File

@ -0,0 +1,147 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::tasks::{
config::{CommonConfig, SyncedDir},
heartbeat::HeartbeatSender,
utils,
};
use anyhow::Result;
use futures::stream::StreamExt;
use onefuzz::{az_copy, blob::url::BlobUrl};
use onefuzz::{expand::Expand, fs::set_executable, fs::OwnedDir};
use reqwest::Url;
use serde::Deserialize;
use std::{
collections::HashMap,
path::{Path, PathBuf},
str,
};
use storage_queue::{QueueClient, EMPTY_QUEUE_DELAY};
use tokio::{fs, process::Command};
#[derive(Debug, Deserialize)]
pub struct Config {
pub analyzer_exe: String,
pub analyzer_options: Vec<String>,
pub analyzer_env: HashMap<String, String>,
pub target_exe: PathBuf,
pub target_options: Vec<String>,
pub input_queue: Option<Url>,
pub crashes: Option<SyncedDir>,
pub analysis: SyncedDir,
pub tools: SyncedDir,
#[serde(flatten)]
pub common: CommonConfig,
}
pub async fn spawn(config: Config) -> Result<()> {
let tmp_dir = PathBuf::from(format!("./{}/tmp", config.common.task_id));
let tmp = OwnedDir::new(tmp_dir);
tmp.reset().await?;
utils::init_dir(&config.analysis.path).await?;
utils::init_dir(&config.tools.path).await?;
utils::sync_remote_dir(&config.tools, utils::SyncOperation::Pull).await?;
set_executable(&config.tools.path).await?;
run_existing(&config).await?;
poll_inputs(&config, tmp).await?;
Ok(())
}
async fn run_existing(config: &Config) -> Result<()> {
if let Some(crashes) = &config.crashes {
utils::init_dir(&crashes.path).await?;
utils::sync_remote_dir(&crashes, utils::SyncOperation::Pull).await?;
let mut read_dir = fs::read_dir(&crashes.path).await?;
while let Some(file) = read_dir.next().await {
verbose!("Processing file {:?}", file);
let file = file?;
run_tool(file.path(), &config).await?;
}
utils::sync_remote_dir(&config.analysis, utils::SyncOperation::Push).await?;
}
Ok(())
}
async fn already_checked(config: &Config, input: &BlobUrl) -> Result<bool> {
let result = if let Some(crashes) = &config.crashes {
crashes.url.account() == input.account()
&& crashes.url.container() == input.container()
&& crashes.path.join(input.name()).exists()
} else {
false
};
Ok(result)
}
async fn poll_inputs(config: &Config, tmp_dir: OwnedDir) -> Result<()> {
let heartbeat = config.common.init_heartbeat();
if let Some(queue) = &config.input_queue {
let mut input_queue = QueueClient::new(queue.clone());
loop {
heartbeat.alive();
if let Some(message) = input_queue.pop().await? {
let input_url = match BlobUrl::parse(str::from_utf8(message.data())?) {
Ok(url) => url,
Err(err) => {
error!("could not parse input URL from queue message: {}", err);
return Ok(());
}
};
if !already_checked(&config, &input_url).await? {
let file_name = input_url.name();
let mut destination_path = PathBuf::from(tmp_dir.path());
destination_path.push(file_name);
az_copy::copy(input_url.url().as_ref(), &destination_path, false).await?;
run_tool(destination_path, &config).await?;
utils::sync_remote_dir(&config.analysis, utils::SyncOperation::Push).await?;
}
input_queue.delete(message).await?;
} else {
warn!("no new candidate inputs found, sleeping");
tokio::time::delay_for(EMPTY_QUEUE_DELAY).await;
}
}
}
Ok(())
}
pub async fn run_tool(input: impl AsRef<Path>, config: &Config) -> Result<()> {
let mut tool_args = Expand::new();
tool_args
.input(&input)
.target_exe(&config.target_exe)
.target_options(&config.target_options)
.analyzer_exe(&config.analyzer_exe)
.analyzer_options(&config.analyzer_options)
.output_dir(&config.analysis.path);
let analyzer_path = Expand::new()
.tools_dir(&config.tools.path)
.evaluate_value(&config.analyzer_exe)?;
let mut cmd = Command::new(analyzer_path);
cmd.kill_on_drop(true).env_remove("RUST_LOG");
for arg in tool_args.evaluate(&config.analyzer_options)? {
cmd.arg(arg);
}
for (k, v) in &config.analyzer_env {
cmd.env(k, tool_args.evaluate_value(v)?);
}
cmd.output().await?;
Ok(())
}

View File

@ -0,0 +1,4 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
pub mod generic;

View File

@ -0,0 +1,150 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#![allow(clippy::large_enum_variant)]
use crate::tasks::{analysis, coverage, fuzz, heartbeat::*, merge, report};
use anyhow::Result;
use onefuzz::{
blob::BlobContainerUrl,
machine_id::get_machine_id,
telemetry::{self, Event::task_start, EventData},
};
use reqwest::Url;
use serde::{self, Deserialize};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use uuid::Uuid;
#[derive(Debug, Deserialize, PartialEq, Clone)]
pub enum ContainerType {
#[serde(alias = "inputs")]
Inputs,
}
#[derive(Debug, Deserialize, Clone)]
pub struct CommonConfig {
pub job_id: Uuid,
pub task_id: Uuid,
pub instrumentation_key: Option<Uuid>,
pub heartbeat_queue: Option<Url>,
pub telemetry_key: Option<Uuid>,
}
impl CommonConfig {
pub fn init_heartbeat(&self) -> Option<HeartbeatClient> {
self.heartbeat_queue
.clone()
.map(|url| HeartbeatClient::init(url, self.task_id))
}
}
#[derive(Debug, Deserialize)]
#[serde(tag = "task_type")]
pub enum Config {
#[serde(alias = "libfuzzer_fuzz")]
LibFuzzerFuzz(fuzz::libfuzzer_fuzz::Config),
#[serde(alias = "libfuzzer_crash_report")]
LibFuzzerReport(report::libfuzzer_report::Config),
#[serde(alias = "libfuzzer_merge")]
LibFuzzerMerge(merge::libfuzzer_merge::Config),
#[serde(alias = "libfuzzer_coverage")]
LibFuzzerCoverage(coverage::libfuzzer_coverage::Config),
#[serde(alias = "generic_analysis")]
GenericAnalysis(analysis::generic::Config),
#[serde(alias = "generic_generator")]
GenericGenerator(fuzz::generator::GeneratorConfig),
#[serde(alias = "generic_supervisor")]
GenericSupervisor(fuzz::supervisor::SupervisorConfig),
#[serde(alias = "generic_merge")]
GenericMerge(merge::generic::Config),
#[serde(alias = "generic_crash_report")]
GenericReport(report::generic::Config),
}
impl Config {
pub fn from_file(path: impl AsRef<Path>) -> Result<Self> {
let json = std::fs::read_to_string(path)?;
Ok(serde_json::from_str(&json)?)
}
pub fn common(&self) -> &CommonConfig {
match self {
Config::LibFuzzerFuzz(c) => &c.common,
Config::LibFuzzerMerge(c) => &c.common,
Config::LibFuzzerReport(c) => &c.common,
Config::LibFuzzerCoverage(c) => &c.common,
Config::GenericAnalysis(c) => &c.common,
Config::GenericMerge(c) => &c.common,
Config::GenericReport(c) => &c.common,
Config::GenericSupervisor(c) => &c.common,
Config::GenericGenerator(c) => &c.common,
}
}
pub fn report_event(&self) {
let event_type = match self {
Config::LibFuzzerFuzz(_) => "libfuzzer_fuzz",
Config::LibFuzzerMerge(_) => "libfuzzer_merge",
Config::LibFuzzerReport(_) => "libfuzzer_crash_report",
Config::LibFuzzerCoverage(_) => "libfuzzer_coverage",
Config::GenericAnalysis(_) => "generic_analysis",
Config::GenericMerge(_) => "generic_merge",
Config::GenericReport(_) => "generic_crash_report",
Config::GenericSupervisor(_) => "generic_supervisor",
Config::GenericGenerator(_) => "generic_generator",
};
event!(task_start; EventData::Type = event_type);
}
pub async fn run(self) -> Result<()> {
telemetry::set_property(EventData::JobId(self.common().job_id));
telemetry::set_property(EventData::TaskId(self.common().task_id));
telemetry::set_property(EventData::MachineId(get_machine_id().await?));
info!("agent ready, dispatching task");
self.report_event();
match self {
Config::LibFuzzerFuzz(config) => {
fuzz::libfuzzer_fuzz::LibFuzzerFuzzTask::new(config)?
.start()
.await
}
Config::LibFuzzerReport(config) => {
report::libfuzzer_report::ReportTask::new(config)
.run()
.await
}
Config::LibFuzzerCoverage(config) => {
coverage::libfuzzer_coverage::CoverageTask::new(Arc::new(config))
.run()
.await
}
Config::LibFuzzerMerge(config) => merge::libfuzzer_merge::spawn(Arc::new(config)).await,
Config::GenericAnalysis(config) => analysis::generic::spawn(config).await,
Config::GenericGenerator(config) => fuzz::generator::spawn(Arc::new(config)).await,
Config::GenericSupervisor(config) => fuzz::supervisor::spawn(config).await,
Config::GenericMerge(config) => merge::generic::spawn(Arc::new(config)).await,
Config::GenericReport(config) => report::generic::ReportTask::new(&config).run().await,
}
}
}
#[derive(Debug, Deserialize, Clone, PartialEq)]
pub struct SyncedDir {
pub path: PathBuf,
pub url: BlobContainerUrl,
}

View File

@ -0,0 +1,252 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
//! # Coverage Task
//!
//! Computes a streaming coverage metric using Sancov-instrumented libFuzzers.
//! Reports the latest coverage rate via telemetry events and updates a remote
//! total coverage map in blob storage.
//!
//! ## Instrumentation
//!
//! Assumes the libFuzzer is instrumented with Sancov inline 8-bit counters.
//! This feature updates a global table without any PC callback. The coverage
//! scripts find and dump this table after executing the test input. For now,
//! our metric projects the counter value to a single bit, treating each table
//! entry as a flag rather than a counter.
//!
//! ## Dependencies
//!
//! This task invokes OS-specific debugger scripts to dump the coverage for
//! each input. To do this, the following must be in the `$PATH`:
//!
//! ### Linux
//! - `python3` (3.6)
//! - `gdb` (8.1)
//!
//! ### Windows
//! - `powershell.exe` (5.1)
//! - `cdb.exe` (10.0)
//!
//! Versions in parentheses have been tested.
use crate::tasks::config::SyncedDir;
use crate::tasks::coverage::{recorder::CoverageRecorder, total::TotalCoverage};
use crate::tasks::heartbeat::*;
use crate::tasks::utils::{init_dir, sync_remote_dir, SyncOperation};
use crate::tasks::{config::CommonConfig, generic::input_poller::*};
use anyhow::Result;
use async_trait::async_trait;
use futures::stream::StreamExt;
use onefuzz::fs::list_files;
use onefuzz::telemetry::Event::coverage_data;
use onefuzz::telemetry::EventData;
use reqwest::Url;
use serde::Deserialize;
use std::collections::HashMap;
use std::{
ffi::OsString,
path::{Path, PathBuf},
sync::Arc,
};
use storage_queue::Message;
use tokio::fs;
const TOTAL_COVERAGE: &str = "total.cov";
#[derive(Debug, Deserialize)]
pub struct Config {
pub target_exe: PathBuf,
pub target_env: HashMap<String, String>,
pub target_options: Vec<String>,
pub input_queue: Option<Url>,
pub readonly_inputs: Vec<SyncedDir>,
pub coverage: SyncedDir,
#[serde(flatten)]
pub common: CommonConfig,
}
/// Compute the coverage provided by one or both of:
///
/// 1. A list of seed corpus containers (one-time batch mode)
/// 2. A queue of inputs pending coverage analysis (streaming)
///
/// If `seed_containers` is empty and `input_queue` is absent, this task
/// will do nothing. If `input_queue` is present, then this task will poll
/// forever.
pub struct CoverageTask {
config: Arc<Config>,
poller: InputPoller<Message>,
}
impl CoverageTask {
pub fn new(config: impl Into<Arc<Config>>) -> Self {
let config = config.into();
let task_dir = PathBuf::from(config.common.task_id.to_string());
let poller_dir = task_dir.join("poller");
let poller = InputPoller::<Message>::new(poller_dir);
Self { config, poller }
}
pub async fn run(&mut self) -> Result<()> {
info!("starting libFuzzer coverage task");
init_dir(&self.config.coverage.path).await?;
verbose!(
"initialized coverage dir, path = {}",
self.config.coverage.path.display()
);
sync_remote_dir(&self.config.coverage, SyncOperation::Pull).await?;
verbose!(
"synced coverage dir, path = {}",
self.config.coverage.path.display()
);
self.process().await
}
async fn process(&mut self) -> Result<()> {
let mut processor = CoverageProcessor::new(self.config.clone()).await?;
// Update the total with the coverage from each seed corpus.
for dir in &self.config.readonly_inputs {
verbose!("recording coverage for {}", dir.path.display());
init_dir(&dir.path).await?;
sync_remote_dir(&dir, SyncOperation::Pull).await?;
self.record_corpus_coverage(&mut processor, dir).await?;
fs::remove_dir_all(&dir.path).await?;
sync_remote_dir(&self.config.coverage, SyncOperation::Push).await?;
}
info!(
"recorded coverage for {} containers in `readonly_inputs`",
self.config.readonly_inputs.len(),
);
// If a queue has been provided, poll it for new coverage.
if let Some(queue) = &self.config.input_queue {
verbose!("polling queue for new coverage");
let callback = CallbackImpl::new(queue.clone(), processor);
self.poller.run(callback).await?;
}
Ok(())
}
async fn record_corpus_coverage(
&self,
processor: &mut CoverageProcessor,
corpus_dir: &SyncedDir,
) -> Result<()> {
let mut corpus = fs::read_dir(&corpus_dir.path).await?;
while let Some(input) = corpus.next().await {
let input = match input {
Ok(input) => input,
Err(err) => {
error!("{}", err);
continue;
}
};
processor.test_input(&input.path()).await?;
}
Ok(())
}
}
pub struct CoverageProcessor {
config: Arc<Config>,
pub recorder: CoverageRecorder,
pub total: TotalCoverage,
pub module_totals: HashMap<OsString, TotalCoverage>,
heartbeat_client: Option<HeartbeatClient>,
}
impl CoverageProcessor {
pub async fn new(config: Arc<Config>) -> Result<Self> {
let heartbeat_client = config.common.init_heartbeat();
let total = TotalCoverage::new(config.coverage.path.join(TOTAL_COVERAGE));
let recorder = CoverageRecorder::new(config.clone());
let module_totals = HashMap::default();
Ok(Self {
config,
recorder,
total,
module_totals,
heartbeat_client,
})
}
async fn update_module_total(&mut self, file: &Path, data: &[u8]) -> Result<()> {
let module = file
.file_name()
.ok_or_else(|| format_err!("module must have filename"))?
.to_os_string();
verbose!("updating module info {:?}", module);
if !self.module_totals.contains_key(&module) {
let parent = &self.config.coverage.path.join("by-module");
fs::create_dir_all(parent).await?;
let module_total = parent.join(&module);
let total = TotalCoverage::new(module_total);
self.module_totals.insert(module.clone(), total);
}
self.module_totals[&module].update_bytes(data).await?;
verbose!("updated {:?}", module);
Ok(())
}
async fn collect_by_module(&mut self, path: &Path) -> Result<PathBuf> {
let files = list_files(&path).await?;
let mut sum = Vec::new();
for file in &files {
verbose!("checking {:?}", file);
let mut content = fs::read(file).await?;
self.update_module_total(file, &content).await?;
sum.append(&mut content);
}
let mut combined = path.as_os_str().to_owned();
combined.push(".cov");
fs::write(&combined, sum).await?;
Ok(combined.into())
}
pub async fn test_input(&mut self, input: &Path) -> Result<()> {
info!("processing input {:?}", input);
let new_coverage = self.recorder.record(input).await?;
let combined = self.collect_by_module(&new_coverage).await?;
self.total.update(&combined).await?;
Ok(())
}
pub async fn report_total(&self) -> Result<()> {
let info = self.total.info().await?;
event!(coverage_data; EventData::Covered = info.covered, EventData::Features = info.features, EventData::Rate = info.rate);
Ok(())
}
}
#[async_trait]
impl Processor for CoverageProcessor {
async fn process(&mut self, _url: Url, input: &Path) -> Result<()> {
self.heartbeat_client.alive();
self.test_input(input).await?;
self.report_total().await?;
sync_remote_dir(&self.config.coverage, SyncOperation::Push).await?;
Ok(())
}
}

View File

@ -0,0 +1,6 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
pub mod libfuzzer_coverage;
pub mod recorder;
pub mod total;

View File

@ -0,0 +1,155 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::{
env,
path::{Path, PathBuf},
process::Stdio,
sync::Arc,
};
use anyhow::Result;
use onefuzz::{
fs::{has_files, OwnedDir},
sha256::digest_file,
};
use tokio::{
fs,
process::{Child, Command},
};
use crate::tasks::coverage::libfuzzer_coverage::Config;
pub struct CoverageRecorder {
config: Arc<Config>,
script_dir: OwnedDir,
}
impl CoverageRecorder {
pub fn new(config: Arc<Config>) -> Self {
let script_dir =
OwnedDir::new(env::var("ONEFUZZ_TOOLS").unwrap_or_else(|_| "script".to_string()));
Self { config, script_dir }
}
/// Invoke a script to write coverage to a file.
///
/// Per module coverage is written to:
/// coverage/inputs/<SHA256_OF_INPUT>/<module_name>.cov
///
/// The `.cov` file is a binary dump of the 8-bit PC counter table.
pub async fn record(&mut self, test_input: impl AsRef<Path>) -> Result<PathBuf> {
let test_input = test_input.as_ref();
let coverage_path = {
let digest = digest_file(test_input).await?;
self.config.coverage.path.join("inputs").join(digest)
};
fs::create_dir_all(&coverage_path).await?;
let script = self.invoke_debugger_script(test_input, &coverage_path)?;
let output = script.wait_with_output().await?;
if !output.status.success() {
let err = format_err!("coverage recording failed: {}", output.status);
error!("{}", err);
error!(
"recording stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
error!(
"recording stdout: {}",
String::from_utf8_lossy(&output.stdout)
);
return Err(err);
} else {
verbose!(
"recording stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
verbose!(
"recording stdout: {}",
String::from_utf8_lossy(&output.stdout)
);
}
if !has_files(&coverage_path).await? {
tokio::fs::remove_dir(&coverage_path).await?;
bail!("no coverage files for input: {}", test_input.display());
}
Ok(coverage_path)
}
#[cfg(target_os = "linux")]
fn invoke_debugger_script(&self, test_input: &Path, output: &Path) -> Result<Child> {
let script_path = self
.script_dir
.path()
.join("linux")
.join("libfuzzer-coverage")
.join("coverage_cmd.py");
let mut cmd = Command::new("gdb");
cmd.arg(&self.config.target_exe)
.arg("-nh")
.arg("-batch")
.arg("-x")
.arg(script_path)
.arg("-ex")
.arg(format!(
"coverage {} {} {}",
&self.config.target_exe.to_string_lossy(),
test_input.to_string_lossy(),
output.to_string_lossy(),
))
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true);
for (k, v) in &self.config.target_env {
cmd.env(k, v);
}
let child = cmd.spawn()?;
Ok(child)
}
#[cfg(target_os = "windows")]
fn invoke_debugger_script(&self, test_input: &Path, output: &Path) -> Result<Child> {
let script_path = self
.script_dir
.path()
.join("win64")
.join("libfuzzer-coverage")
.join("DumpCounters.js");
let cdb_cmd = format!(
".scriptload {}; !dumpcounters {:?}; q",
script_path.to_string_lossy(),
output.to_string_lossy()
);
let mut cmd = Command::new("cdb.exe");
cmd.arg("-c")
.arg(cdb_cmd)
.arg(&self.config.target_exe)
.arg(test_input)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true);
for (k, v) in &self.config.target_env {
cmd.env(k, v);
}
let child = cmd.spawn()?;
Ok(child)
}
}

View File

@ -0,0 +1,92 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::path::{Path, PathBuf};
use anyhow::Result;
use tokio::{fs, io};
pub struct TotalCoverage {
/// Absolute path to the total coverage file.
///
/// May not yet exist on disk.
path: PathBuf,
}
#[derive(Debug)]
pub struct Info {
pub covered: u64,
pub features: u64,
pub rate: f64,
}
impl TotalCoverage {
pub fn new(path: PathBuf) -> Self {
Self { path }
}
pub async fn data(&self) -> Result<Option<Vec<u8>>> {
use io::ErrorKind::NotFound;
let data = fs::read(&self.path).await;
if let Err(err) = &data {
if err.kind() == NotFound {
return Ok(None);
}
}
Ok(Some(data?))
}
pub fn path(&self) -> &Path {
&self.path
}
pub async fn update_bytes(&self, new_data: &[u8]) -> Result<()> {
match self.data().await {
Ok(Some(mut total_data)) => {
for (i, b) in new_data.iter().enumerate() {
if *b > 0 {
total_data[i] = 1;
}
}
fs::write(self.path(), total_data).await?;
}
Ok(None) => {
// Base case: we don't yet have any total coverage. Promote the
// new coverage to being our total coverage.
info!("initializing total coverage map {}", self.path().display());
fs::write(self.path(), new_data).await?;
}
Err(err) => {
// Couldn't read total for some other reason, so this is a real error.
return Err(err);
}
}
Ok(())
}
pub async fn update(&self, new: impl AsRef<Path>) -> Result<()> {
let new_data = fs::read(new).await?;
self.update_bytes(&new_data).await
}
pub async fn info(&self) -> Result<Info> {
let data = self
.data()
.await?
.ok_or_else(|| format_err!("coverage file not found"))?;
let covered = data.iter().filter(|&&c| c > 0).count() as u64;
let features = data.len() as u64;
let rate = (covered as f64) / (features as f64);
Ok(Info {
covered,
features,
rate,
})
}
}

View File

@ -0,0 +1,239 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::tasks::{
config::{CommonConfig, SyncedDir},
heartbeat::*,
utils,
};
use anyhow::{Error, Result};
use futures::stream::StreamExt;
use onefuzz::{expand::Expand, fs::set_executable, input_tester::Tester, sha256};
use serde::Deserialize;
use std::collections::HashMap;
use std::{
ffi::OsString,
path::{Path, PathBuf},
process::Stdio,
sync::Arc,
};
use tokio::{fs, process::Command};
fn default_bool_true() -> bool {
true
}
#[derive(Debug, Deserialize, Clone)]
pub struct GeneratorConfig {
pub generator_exe: String,
pub generator_env: HashMap<String, String>,
pub generator_options: Vec<String>,
pub readonly_inputs: Vec<SyncedDir>,
pub crashes: SyncedDir,
pub tools: SyncedDir,
pub target_exe: PathBuf,
pub target_env: HashMap<String, String>,
pub target_options: Vec<String>,
pub target_timeout: Option<u64>,
#[serde(default)]
pub check_asan_log: bool,
#[serde(default = "default_bool_true")]
pub check_debugger: bool,
#[serde(default)]
pub check_retry_count: u64,
pub rename_output: bool,
#[serde(flatten)]
pub common: CommonConfig,
}
pub async fn spawn(config: Arc<GeneratorConfig>) -> Result<(), Error> {
utils::init_dir(&config.crashes.path).await?;
utils::init_dir(&config.tools.path).await?;
utils::sync_remote_dir(&config.tools, utils::SyncOperation::Pull).await?;
set_executable(&config.tools.path).await?;
let hb_client = config.common.init_heartbeat();
for sync_dir in &config.readonly_inputs {
utils::init_dir(&sync_dir.path).await?;
utils::sync_remote_dir(&sync_dir, utils::SyncOperation::Pull).await?;
}
let resync = resync_corpuses(
config.readonly_inputs.clone(),
std::time::Duration::from_secs(10),
);
let crash_dir_monitor = utils::monitor_result_dir(config.crashes.clone());
let tester = Tester::new(
&config.target_exe,
&config.target_options,
&config.target_env,
&config.target_timeout,
config.check_asan_log,
config.check_debugger,
config.check_retry_count,
);
let inputs: Vec<_> = config.readonly_inputs.iter().map(|x| &x.path).collect();
let fuzzing_monitor = start_fuzzing(&config, inputs, tester, hb_client);
futures::try_join!(fuzzing_monitor, resync, crash_dir_monitor)?;
Ok(())
}
async fn generate_input(
generator_exe: &str,
generator_env: &HashMap<String, String>,
generator_options: &[String],
tools_dir: impl AsRef<Path>,
corpus_dir: impl AsRef<Path>,
output_dir: impl AsRef<Path>,
) -> Result<()> {
let mut expand = Expand::new();
expand
.generated_inputs(&output_dir)
.input_corpus(&corpus_dir)
.generator_exe(&generator_exe)
.generator_options(&generator_options)
.tools_dir(&tools_dir);
utils::reset_tmp_dir(&output_dir).await?;
let generator_path = Expand::new()
.tools_dir(tools_dir.as_ref())
.evaluate_value(generator_exe)?;
let mut generator = Command::new(&generator_path);
generator
.kill_on_drop(true)
.env_remove("RUST_LOG")
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::piped());
for arg in expand.evaluate(generator_options)? {
generator.arg(arg);
}
for (k, v) in generator_env {
generator.env(k, expand.evaluate_value(v)?);
}
info!("Generating test cases with {:?}", generator);
let output = generator.spawn()?.wait_with_output().await?;
info!("Test case generation result {:?}", output);
Ok(())
}
async fn start_fuzzing<'a>(
config: &GeneratorConfig,
corpus_dirs: Vec<impl AsRef<Path>>,
tester: Tester<'a>,
heartbeat_sender: Option<HeartbeatClient>,
) -> Result<()> {
let generator_tmp = "generator_tmp";
info!("Starting generator fuzzing loop");
loop {
heartbeat_sender.alive();
for corpus_dir in &corpus_dirs {
let corpus_dir = corpus_dir.as_ref();
generate_input(
&config.generator_exe,
&config.generator_env,
&config.generator_options,
&config.tools.path,
corpus_dir,
generator_tmp,
)
.await?;
let mut read_dir = fs::read_dir(generator_tmp).await?;
while let Some(file) = read_dir.next().await {
verbose!("Processing file {:?}", file);
let file = file?;
let destination_file = if config.rename_output {
let hash = sha256::digest_file(file.path()).await?;
OsString::from(hash)
} else {
file.file_name()
};
let destination_file = config.crashes.path.join(destination_file);
if tester.is_crash(file.path()).await? {
info!("Crash found, path = {}", file.path().display());
if let Err(err) = fs::rename(file.path(), &destination_file).await {
warn!("Unable to move file {:?} : {:?}", file.path(), err);
}
}
}
verbose!(
"Tested generated inputs for corpus = {}",
corpus_dir.display()
);
}
}
}
pub async fn resync_corpuses(dirs: Vec<SyncedDir>, delay: std::time::Duration) -> Result<()> {
loop {
for sync_dir in &dirs {
utils::sync_remote_dir(sync_dir, utils::SyncOperation::Pull)
.await
.ok();
}
tokio::time::delay_for(delay).await;
}
}
mod tests {
#[tokio::test]
#[cfg(target_os = "linux")]
#[ignore]
async fn test_radamsa_linux() {
use super::*;
use std::env;
let radamsa_path = env::var("ONEFUZZ_TEST_RADAMSA_LINUX").unwrap();
let corpus_dir_temp = tempfile::tempdir().unwrap();
let corpus_dir = corpus_dir_temp.into_path();
let seed_file_name = corpus_dir.clone().join("seed.txt");
let radamsa_output_temp = tempfile::tempdir().unwrap();
let radamsa_output = radamsa_output_temp.into_path();
let generator_options: Vec<String> = vec![
"-o",
"{generated_inputs}/input-%n-%s",
"-n",
"100",
"-r",
"{input_corpus}",
]
.iter()
.map(|p| p.to_string())
.collect();
let radamsa_as_path = Path::new(&radamsa_path);
let radamsa_dir = radamsa_as_path.parent().unwrap();
let radamsa_exe = String::from("{tools_dir}/radamsa");
let radamsa_env = HashMap::new();
tokio::fs::write(seed_file_name, "test").await.unwrap();
let _output = generate_input(
&radamsa_exe,
&radamsa_env,
&generator_options,
&radamsa_dir,
corpus_dir,
radamsa_output.clone(),
)
.await;
let generated_outputs = std::fs::read_dir(radamsa_output.clone()).unwrap();
assert_eq!(generated_outputs.count(), 100, "No crashes generated");
}
}

View File

@ -0,0 +1,375 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::tasks::{
config::{CommonConfig, SyncedDir},
heartbeat::HeartbeatSender,
utils,
};
use anyhow::Result;
use futures::{future::try_join_all, stream::StreamExt};
use onefuzz::{
libfuzzer::{LibFuzzer, LibFuzzerLine},
monitor::DirectoryMonitor,
system,
telemetry::{
Event::{new_coverage, new_result, process_stats, runtime_stats},
EventData,
},
uploader::BlobUploader,
};
use serde::Deserialize;
use std::{collections::HashMap, path::PathBuf, process::ExitStatus};
use tokio::{
io,
sync::mpsc,
task,
time::{self, Duration},
};
use uuid::Uuid;
// Time between resync of all corpus container directories.
const RESYNC_PERIOD: Duration = Duration::from_secs(30);
// Delay to allow for observation of CPU usage when reporting proc info.
const PROC_INFO_COLLECTION_DELAY: Duration = Duration::from_secs(1);
// Period of reporting proc info about running processes.
const PROC_INFO_PERIOD: Duration = Duration::from_secs(30);
// Period of reporting fuzzer-generated runtime stats.
const RUNTIME_STATS_PERIOD: Duration = Duration::from_secs(60);
#[derive(Debug, Deserialize, Clone)]
pub struct Config {
pub inputs: SyncedDir,
pub readonly_inputs: Option<Vec<SyncedDir>>,
pub crashes: SyncedDir,
pub target_exe: PathBuf,
pub target_env: HashMap<String, String>,
pub target_options: Vec<String>,
pub target_workers: Option<u64>,
#[serde(flatten)]
pub common: CommonConfig,
}
pub struct LibFuzzerFuzzTask {
config: Config,
}
impl LibFuzzerFuzzTask {
pub fn new(config: Config) -> Result<Self> {
Ok(Self { config })
}
pub async fn start(&self) -> Result<()> {
let workers = self.config.target_workers.unwrap_or_else(|| {
let cpus = num_cpus::get() as u64;
u64::max(1, cpus - 1)
});
self.init_directories().await?;
self.sync_all_corpuses().await?;
let hb_client = self.config.common.init_heartbeat();
// To be scheduled.
let resync = self.resync_all_corpuses();
let new_corpus = self.monitor_new_corpus();
let faults = self.monitor_faults();
let (stats_sender, stats_receiver) = mpsc::unbounded_channel();
let report_stats = report_runtime_stats(workers as usize, stats_receiver, hb_client);
let fuzzers: Vec<_> = (0..workers)
.map(|id| self.start_fuzzer_monitor(id, stats_sender.clone()))
.collect();
let fuzzers = try_join_all(fuzzers);
futures::try_join!(resync, new_corpus, faults, fuzzers, report_stats)?;
Ok(())
}
// The fuzzer monitor coordinates a _series_ of fuzzer runs.
//
// A run is one session of continuous fuzzing, terminated by a fuzzing error
// or discovered fault. The monitor restarts the libFuzzer when it exits.
async fn start_fuzzer_monitor(&self, worker_id: u64, stats_sender: StatsSender) -> Result<()> {
loop {
let run = self.run_fuzzer(worker_id, stats_sender.clone());
if let Err(err) = run.await {
error!("Fuzzer run failed: {}", err);
}
}
}
// Fuzz with a libFuzzer until it exits.
//
// While it runs, parse stderr for progress metrics, and report them.
async fn run_fuzzer(&self, worker_id: u64, stats_sender: StatsSender) -> Result<ExitStatus> {
use io::AsyncBufReadExt;
let run_id = Uuid::new_v4();
info!("starting fuzzer run, run_id = {}", run_id);
let inputs: Vec<_> = {
if let Some(readonly_inputs) = &self.config.readonly_inputs {
readonly_inputs.iter().map(|d| &d.path).collect()
} else {
vec![]
}
};
let fuzzer = LibFuzzer::new(
&self.config.target_exe,
&self.config.target_options,
&self.config.target_env,
);
let mut running =
fuzzer.fuzz(&self.config.crashes.path, &self.config.inputs.path, &inputs)?;
let sys_info = task::spawn(report_fuzzer_sys_info(worker_id, run_id, running.id()));
// Splitting borrow.
let stderr = running
.stderr
.as_mut()
.ok_or_else(|| format_err!("stderr not captured"))?;
let stderr = io::BufReader::new(stderr);
stderr
.lines()
.for_each(|line| {
let stats_sender = stats_sender.clone();
async move {
let line = line.map_err(|e| e.into());
if let Err(err) = try_report_iter_update(stats_sender, worker_id, run_id, line)
{
error!("could not parse fuzzing iteration update: {}", err);
}
}
})
.await;
let (exit_status, _) = tokio::join!(running, sys_info);
Ok(exit_status?)
}
async fn init_directories(&self) -> Result<()> {
utils::init_dir(&self.config.inputs.path).await?;
utils::init_dir(&self.config.crashes.path).await?;
if let Some(readonly_inputs) = &self.config.readonly_inputs {
for dir in readonly_inputs {
utils::init_dir(&dir.path).await?;
}
}
Ok(())
}
async fn sync_all_corpuses(&self) -> Result<()> {
utils::sync_remote_dir(&self.config.inputs, utils::SyncOperation::Pull).await?;
if let Some(readonly_inputs) = &self.config.readonly_inputs {
for corpus in readonly_inputs {
utils::sync_remote_dir(corpus, utils::SyncOperation::Pull).await?;
}
}
Ok(())
}
async fn resync_all_corpuses(&self) -> Result<()> {
loop {
time::delay_for(RESYNC_PERIOD).await;
self.sync_all_corpuses().await?;
}
}
async fn monitor_new_corpus(&self) -> Result<()> {
let url = self.config.inputs.url.url();
let dir = self.config.inputs.path.clone();
let mut monitor = DirectoryMonitor::new(dir);
monitor.start()?;
monitor
.for_each(move |item| {
let url = url.clone();
async move {
event!(new_coverage; EventData::Path = item.display().to_string());
let mut uploader = BlobUploader::new(url);
if let Err(err) = uploader.upload(item.clone()).await {
error!("Couldn't upload coverage: {}", err);
}
}
})
.await;
Ok(())
}
async fn monitor_faults(&self) -> Result<()> {
let url = self.config.crashes.url.url();
let dir = self.config.crashes.path.clone();
let mut monitor = DirectoryMonitor::new(dir);
monitor.start()?;
monitor
.for_each(move |item| {
let url = url.clone();
async move {
event!(new_result; EventData::Path = item.display().to_string());
let mut uploader = BlobUploader::new(url);
if let Err(err) = uploader.upload(item.clone()).await {
error!("Couldn't upload fault: {}", err);
}
}
})
.await;
Ok(())
}
}
fn try_report_iter_update(
stats_sender: StatsSender,
worker_id: u64,
run_id: Uuid,
line: Result<String>,
) -> Result<()> {
let line = line?;
let line = LibFuzzerLine::parse(line)?;
if let Some(line) = line {
stats_sender.send(RuntimeStats {
worker_id,
run_id,
count: line.iters(),
execs_sec: line.execs_sec(),
})?;
}
Ok(())
}
async fn report_fuzzer_sys_info(worker_id: u64, run_id: Uuid, fuzzer_pid: u32) -> Result<()> {
loop {
system::refresh()?;
// Allow for sampling CPU usage.
time::delay_for(PROC_INFO_COLLECTION_DELAY).await;
if let Some(proc_info) = system::proc_info(fuzzer_pid)? {
event!(process_stats;
EventData::WorkerId = worker_id,
EventData::RunId = run_id,
EventData::Name = proc_info.name,
EventData::Pid = proc_info.pid,
EventData::ProcessStatus = proc_info.status,
EventData::CpuUsage = proc_info.cpu_usage,
EventData::PhysicalMemory = proc_info.memory_kb,
EventData::VirtualMemory = proc_info.virtual_memory_kb
);
} else {
// The process no longer exists.
break;
}
time::delay_for(PROC_INFO_PERIOD).await;
}
Ok(())
}
#[derive(Clone, Copy, Debug)]
pub struct RuntimeStats {
worker_id: u64,
run_id: Uuid,
count: u64,
execs_sec: f64,
}
impl RuntimeStats {
pub fn report(&self) {
event!(
runtime_stats;
EventData::WorkerId = self.worker_id,
EventData::RunId = self.run_id,
EventData::Count = self.count,
EventData::ExecsSecond = self.execs_sec
);
}
}
type StatsSender = mpsc::UnboundedSender<RuntimeStats>;
#[derive(Clone, Copy, Debug)]
struct Timer {
interval: Duration,
}
impl Timer {
pub fn new(interval: Duration) -> Self {
Self { interval }
}
async fn wait(&self) {
time::delay_for(self.interval).await;
}
}
// Report runtime stats, as delivered via the `stats` channel, with a periodic trigger to
// guarantee a minimum reporting frequency.
//
// The minimum frequency is to aid metric visualization. The libFuzzer binary runtime's `pulse`
// event is triggered by a doubling of the last (locally) logged total iteration count. For long-
// running worker runs, this can result in misleading gaps and binning artifacts. In effect, we
// are approximating nearest-neighbor interpolation on the runtime stats time series.
async fn report_runtime_stats(
workers: usize,
mut stats_channel: mpsc::UnboundedReceiver<RuntimeStats>,
heartbeat_sender: impl HeartbeatSender,
) -> Result<()> {
// Cache the last-reported stats for a given worker.
//
// When logging stats, the most recently reported runtime stats will be used for any
// missing data. For time-triggered logging, it will be used for all workers.
let mut last_reported: Vec<Option<RuntimeStats>> =
std::iter::repeat(None).take(workers).collect();
let timer = Timer::new(RUNTIME_STATS_PERIOD);
loop {
tokio::select! {
Some(stats) = stats_channel.next() => {
heartbeat_sender.alive();
stats.report();
let idx = stats.worker_id as usize;
last_reported[idx] = Some(stats);
}
_ = timer.wait() => {
for stats in &last_reported {
if let Some(stats) = stats {
stats.report();
}
}
}
};
}
}

View File

@ -0,0 +1,6 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
pub mod generator;
pub mod libfuzzer_fuzz;
pub mod supervisor;

View File

@ -0,0 +1,330 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#![allow(clippy::too_many_arguments)]
use crate::tasks::{
config::{CommonConfig, ContainerType, SyncedDir},
heartbeat::*,
stats::common::{monitor_stats, StatsFormat},
utils::{self, CheckNotify},
};
use anyhow::{Error, Result};
use appinsights::telemetry::SeverityLevel;
use onefuzz::{
expand::Expand,
fs::{has_files, set_executable, OwnedDir},
};
use serde::Deserialize;
use std::{
collections::HashMap,
path::{Path, PathBuf},
process::Stdio,
time::Duration,
};
use tokio::{
process::{Child, Command},
sync::Notify,
};
#[derive(Debug, Deserialize)]
pub struct SupervisorConfig {
pub inputs: SyncedDir,
pub crashes: SyncedDir,
pub supervisor_exe: String,
pub supervisor_env: HashMap<String, String>,
pub supervisor_options: Vec<String>,
pub supervisor_input_marker: Option<String>,
pub target_exe: PathBuf,
pub target_options: Vec<String>,
pub tools: SyncedDir,
pub wait_for_files: Option<ContainerType>,
pub stats_file: Option<String>,
pub stats_format: Option<StatsFormat>,
#[serde(flatten)]
pub common: CommonConfig,
}
const HEARTBEAT_PERIOD: Duration = Duration::from_secs(60);
pub async fn spawn(config: SupervisorConfig) -> Result<(), Error> {
let runtime_dir = OwnedDir::new(config.common.task_id.to_string());
runtime_dir.create_if_missing().await?;
utils::init_dir(&config.tools.path).await?;
utils::sync_remote_dir(&config.tools, utils::SyncOperation::Pull).await?;
set_executable(&config.tools.path).await?;
let supervisor_path = Expand::new()
.tools_dir(&config.tools.path)
.evaluate_value(&config.supervisor_exe)?;
let crashes = SyncedDir {
path: runtime_dir.path().join("crashes"),
url: config.crashes.url.clone(),
};
utils::init_dir(&crashes.path).await?;
let monitor_crashes = utils::monitor_result_dir(crashes.clone());
let inputs = SyncedDir {
path: runtime_dir.path().join("inputs"),
url: config.inputs.url.clone(),
};
utils::init_dir(&inputs.path).await?;
verbose!("initialized {}", inputs.path.display());
let sync_inputs = resync_corpus(inputs.clone());
if let Some(context) = &config.wait_for_files {
let dir = match context {
ContainerType::Inputs => &inputs,
};
let delay = std::time::Duration::from_secs(10);
loop {
utils::sync_remote_dir(dir, utils::SyncOperation::Pull).await?;
if has_files(&dir.path).await? {
break;
}
tokio::time::delay_for(delay).await;
}
}
let process = start_supervisor(
&runtime_dir.path(),
&supervisor_path,
&config.target_exe,
&crashes.path,
&inputs.path,
&config.target_options,
&config.supervisor_options,
&config.supervisor_env,
&config.supervisor_input_marker,
)
.await?;
let stopped = Notify::new();
let monitor_process = monitor_process(process, &stopped);
let hb = config.common.init_heartbeat();
let heartbeat_process = heartbeat_process(&stopped, hb);
let monitor_path = if let Some(stats_file) = &config.stats_file {
Some(
Expand::new()
.runtime_dir(runtime_dir.path())
.evaluate_value(stats_file)?,
)
} else {
verbose!("no stats file to monitor");
None
};
let monitor_stats = monitor_stats(monitor_path, config.stats_format);
futures::try_join!(
heartbeat_process,
monitor_process,
monitor_stats,
monitor_crashes,
sync_inputs,
)?;
Ok(())
}
async fn heartbeat_process(
stopped: &Notify,
heartbeat_client: Option<HeartbeatClient>,
) -> Result<()> {
while !stopped.is_notified(HEARTBEAT_PERIOD).await {
heartbeat_client.alive();
}
Ok(())
}
async fn monitor_process(process: tokio::process::Child, stopped: &Notify) -> Result<()> {
verbose!("waiting for child output...");
let output: std::process::Output = process.wait_with_output().await?;
verbose!("child exited with {:?}", output.status);
if output.status.success() {
verbose!("child status is success, notifying");
stopped.notify();
Ok(())
} else {
let err_text = String::from_utf8_lossy(&output.stderr);
let output_text = String::from_utf8_lossy(&output.stdout);
let message = format!("{} {}", err_text, output_text);
error!("{}", message);
stopped.notify();
Err(Error::msg(message))
}
}
async fn start_supervisor(
runtime_dir: impl AsRef<Path>,
supervisor_path: impl AsRef<Path>,
target_exe: impl AsRef<Path>,
fault_dir: impl AsRef<Path>,
inputs_dir: impl AsRef<Path>,
target_options: &[String],
supervisor_options: &[String],
supervisor_env: &HashMap<String, String>,
supervisor_input_marker: &Option<String>,
) -> Result<Child> {
let mut cmd = Command::new(supervisor_path.as_ref());
let cmd = cmd
.kill_on_drop(true)
.env_remove("RUST_LOG")
.stdout(Stdio::piped())
.stderr(Stdio::piped());
let mut expand = Expand::new();
expand
.supervisor_exe(supervisor_path)
.supervisor_options(supervisor_options)
.crashes(fault_dir)
.runtime_dir(runtime_dir)
.target_exe(target_exe)
.target_options(target_options)
.input_corpus(inputs_dir);
if let Some(input_marker) = supervisor_input_marker {
expand.input(input_marker);
}
let args = expand.evaluate(supervisor_options)?;
cmd.args(&args);
for (k, v) in supervisor_env {
cmd.env(k, expand.evaluate_value(v)?);
}
info!("starting supervisor '{:?}'", cmd);
let child = cmd.spawn()?;
Ok(child)
}
pub async fn resync_corpus(sync_dir: SyncedDir) -> Result<()> {
let delay = std::time::Duration::from_secs(10);
loop {
let result = utils::sync_remote_dir(&sync_dir, utils::SyncOperation::Pull).await;
if result.is_err() {
warn!("error syncing dir: {:?}", sync_dir);
}
tokio::time::delay_for(delay).await;
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tasks::stats::afl::read_stats;
use onefuzz::telemetry::EventData;
use std::collections::HashMap;
use std::time::Instant;
const MAX_FUZZ_TIME_SECONDS: u64 = 120;
async fn has_stats(path: &PathBuf) -> bool {
if let Ok(stats) = read_stats(path).await {
for entry in stats {
if matches!(entry, EventData::ExecsSecond(x) if x > 0.0) {
return true;
}
}
false
} else {
false
}
}
#[tokio::test]
#[cfg(target_os = "linux")]
#[cfg_attr(not(feature = "integration_test"), ignore)]
async fn test_fuzzer_linux() {
use std::env;
let runtime_dir = tempfile::tempdir().unwrap();
let afl_fuzz_exe = if let Ok(x) = env::var("ONEFUZZ_TEST_AFL_LINUX_FUZZER") {
x
} else {
warn!("Unable to test AFL integration");
return;
};
let afl_test_binary = if let Ok(x) = env::var("ONEFUZZ_TEST_AFL_LINUX_TEST_BINARY") {
x
} else {
warn!("Unable to test AFL integration");
return;
};
let fault_dir_temp = tempfile::tempdir().unwrap();
let fault_dir = fault_dir_temp.path();
let corpus_dir_temp = tempfile::tempdir().unwrap();
let corpus_dir = corpus_dir_temp.into_path();
let seed_file_name = corpus_dir.clone().join("seed.txt");
let target_options = vec!["{input}".to_owned()];
let supervisor_env = HashMap::new();
let supervisor_options: Vec<_> = vec![
"-d",
"-i",
"{input_corpus}",
"-o",
"{crashes}",
"--",
"{target_exe}",
"{target_options}",
]
.iter()
.map(|p| p.to_string())
.collect();
// AFL input marker
let supervisor_input_marker = Some("@@".to_owned());
println!(
"testing 2: corpus_dir {:?} -- fault_dir {:?} -- seed_file_name {:?}",
corpus_dir, fault_dir, seed_file_name
);
tokio::fs::write(seed_file_name, "xyz").await.unwrap();
let process = start_supervisor(
runtime_dir,
PathBuf::from(afl_fuzz_exe),
PathBuf::from(afl_test_binary),
fault_dir.clone(),
corpus_dir,
&target_options,
&supervisor_options,
&supervisor_env,
&supervisor_input_marker,
)
.await
.unwrap();
let notify = Notify::new();
let _fuzzing_monitor = monitor_process(process, &notify);
let stat_output = fault_dir.join("fuzzer_stats");
let start = Instant::now();
loop {
if has_stats(&stat_output).await {
break;
}
if start.elapsed().as_secs() > MAX_FUZZ_TIME_SECONDS {
panic!(
"afl did not generate stats in {} seconds",
MAX_FUZZ_TIME_SECONDS
);
}
tokio::time::delay_for(std::time::Duration::from_secs(1)).await;
}
}
}

View File

@ -0,0 +1,293 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::{fmt, path::PathBuf};
use anyhow::Result;
use futures::stream::StreamExt;
use onefuzz::blob::BlobUrl;
use onefuzz::fs::OwnedDir;
use reqwest::Url;
use tokio::{
fs,
time::{self, Duration},
};
use crate::tasks::{config::SyncedDir, utils};
mod callback;
pub use callback::*;
const POLL_INTERVAL: Duration = Duration::from_secs(10);
#[cfg(test)]
mod tests;
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum State<M> {
Ready,
Polled(Option<M>),
Parsed(M, Url),
Downloaded(M, Url, PathBuf),
Processed(M),
}
impl<M> fmt::Display for State<M> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
State::Ready => write!(f, "Ready")?,
State::Polled(..) => write!(f, "Polled")?,
State::Parsed(..) => write!(f, "Parsed")?,
State::Downloaded(..) => write!(f, "Downloaded")?,
State::Processed(..) => write!(f, "Processed")?,
}
Ok(())
}
}
pub enum Event<'a, M> {
Poll(&'a mut dyn Queue<M>),
Parse(&'a mut dyn Parser<M>),
Download(&'a mut dyn Downloader),
Process(&'a mut dyn Processor),
Finish(&'a mut dyn Queue<M>),
}
impl<'a, M> fmt::Display for Event<'a, M> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Event::Poll(..) => write!(f, "Poll")?,
Event::Parse(..) => write!(f, "Parse")?,
Event::Download(..) => write!(f, "Download")?,
Event::Process(..) => write!(f, "Process")?,
Event::Finish(..) => write!(f, "Finish")?,
}
Ok(())
}
}
impl<'a, M> fmt::Debug for Event<'a, M> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self)
}
}
/// State machine that tries to poll a queue for new messages, parse a test
/// input URL from each message, download the test input, then process it.
///
/// The implementation of the transition actions are provided by impls of
/// callback traits.
///
/// Generic in the type `M` of the queue message. We assume `M` carries both
/// application data (here, the input URL, in some encoding) and metadata for
/// operations like finalizing a dequeue with a pop receipt.
pub struct InputPoller<M> {
/// Agent-local directory where the poller will download inputs.
/// Will be reset for each new input.
working_dir: OwnedDir,
/// Internal automaton state.
///
/// This is only nullable so we can internally `take()` the current state
/// when scrutinizing it in the `trigger()` method.
state: Option<State<M>>,
batch_dir: Option<SyncedDir>,
}
impl<M> InputPoller<M> {
pub fn new(working_dir: impl Into<PathBuf>) -> Self {
let working_dir = OwnedDir::new(working_dir);
let state = Some(State::Ready);
Self {
state,
working_dir,
batch_dir: None,
}
}
/// Process a given SyncedDir in batch
pub async fn batch_process(
&mut self,
processor: &mut dyn Processor,
to_process: &SyncedDir,
) -> Result<()> {
self.batch_dir = Some(to_process.clone());
utils::init_dir(&to_process.path).await?;
utils::sync_remote_dir(&to_process, utils::SyncOperation::Pull).await?;
let mut read_dir = fs::read_dir(&to_process.path).await?;
while let Some(file) = read_dir.next().await {
verbose!("Processing batch-downloaded input {:?}", file);
let file = file?;
let path = file.path();
// Compute the file name relative to the synced directory, and thus the
// container.
let blob_name = {
let dir_path = to_process.path.canonicalize()?;
let input_path = path.canonicalize()?;
let dir_relative = input_path.strip_prefix(&dir_path)?;
dir_relative.display().to_string()
};
let url = to_process.url.blob(blob_name).url();
processor.process(url, &path).await?;
}
Ok(())
}
/// Check if an input was already processed via batch-processing its container.
pub async fn seen_in_batch(&self, url: &Url) -> Result<bool> {
let result = if let Some(batch_dir) = &self.batch_dir {
if let Ok(blob) = BlobUrl::new(url.clone()) {
batch_dir.url.account() == blob.account()
&& batch_dir.url.container() == blob.container()
&& batch_dir.path.join(blob.name()).exists()
} else {
false
}
} else {
false
};
Ok(result)
}
/// Path to the working directory.
///
/// We will create or reset the working directory before entering the
/// `Downloaded` state, but a caller cannot otherwise assume it exists.
#[allow(unused)]
pub fn working_dir(&self) -> &OwnedDir {
&self.working_dir
}
/// Get the current automaton state, including the state data.
pub fn state(&self) -> &State<M> {
self.state.as_ref().unwrap_or_else(|| unreachable!())
}
fn set_state(&mut self, state: impl Into<Option<State<M>>>) {
self.state = state.into();
}
pub async fn run(&mut self, mut cb: impl Callback<M>) -> Result<()> {
loop {
match self.state() {
State::Polled(None) => {
verbose!("Input queue empty, sleeping");
time::delay_for(POLL_INTERVAL).await;
}
State::Downloaded(_msg, _url, input) => {
info!("Processing downloaded input: {:?}", input);
}
_ => {}
}
self.next(&mut cb).await?;
}
}
/// Transition to the next state in the poll loop, using `cb` to implement
/// the transition actions.
pub async fn next(&mut self, cb: &mut impl Callback<M>) -> Result<()> {
use Event::*;
use State::*;
match self.state() {
Ready => self.trigger(Poll(cb.queue())).await?,
Polled(..) => self.trigger(Parse(cb.parser())).await?,
Parsed(..) => self.trigger(Download(cb.downloader())).await?,
Downloaded(..) => self.trigger(Process(cb.processor())).await?,
Processed(..) => self.trigger(Finish(cb.queue())).await?,
}
Ok(())
}
/// Trigger a state transition event, and execute the action for each valid
/// transition.
///
/// The `Event` itself contains any callback functions and data needed to
/// concretely implement the transition action.
pub async fn trigger(&mut self, event: Event<'_, M>) -> Result<()> {
// Take ownership of the current state so we can move its data out
// of the variant.
//
// Invariant: `self.state.is_some()` on function entry.
//
// This local now repesents the current state, and we must not call
// any other method on `self` that assumes `self.state.is_some()`.
let state = self.state.take().unwrap();
let result = self.try_trigger(state, event).await;
if result.is_err() {
// We must maintain a valid state, and we can logically recover from
// any failed action or invalid transition.
self.state = Some(State::Ready);
}
// Check that we always have a defined internal state.
assert!(self.state.is_some());
result
}
async fn try_trigger(&mut self, state: State<M>, event: Event<'_, M>) -> Result<()> {
use Event::*;
use State::*;
match (state, event) {
(Ready, Poll(queue)) => {
let msg = queue.pop().await?;
self.set_state(Polled(msg));
}
(Polled(msg), Parse(parser)) => {
if let Some(msg) = msg {
let url = parser.parse(&msg)?;
self.set_state(Parsed(msg, url));
} else {
self.set_state(Ready);
}
}
(Parsed(msg, url), Download(downloader)) => {
self.working_dir.reset().await?;
if self.seen_in_batch(&url).await? {
verbose!("url was seen during batch processing: {:?}", url);
self.set_state(Processed(msg));
} else {
let input = downloader
.download(url.clone(), self.working_dir.path())
.await?;
self.set_state(Downloaded(msg, url, input));
}
}
(Downloaded(msg, url, input), Process(processor)) => {
processor.process(url, &input).await?;
self.set_state(Processed(msg));
}
(Processed(msg), Finish(queue)) => {
queue.delete(msg).await?;
self.set_state(Ready);
}
// We could panic here, and treat this case as a logic error.
// However, we want users of this struct to be able to override the
// default transition, so let them recover if they misuse it.
(state, event) => bail!(
"Invalid transition, state = {state}, event = {event}",
state = state,
event = event,
),
}
Ok(())
}
}

View File

@ -0,0 +1,119 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::path::{Path, PathBuf};
use anyhow::Result;
use async_trait::async_trait;
use reqwest::Url;
use storage_queue::{Message, QueueClient};
#[async_trait]
pub trait Queue<M> {
async fn pop(&mut self) -> Result<Option<M>>;
async fn delete(&mut self, msg: M) -> Result<()>;
}
pub trait Parser<M> {
fn parse(&mut self, msg: &M) -> Result<Url>;
}
#[async_trait]
pub trait Downloader {
async fn download(&mut self, url: Url, dir: &Path) -> Result<PathBuf>;
}
#[async_trait]
pub trait Processor {
async fn process(&mut self, url: Url, input: &Path) -> Result<()>;
}
pub trait Callback<M> {
fn queue(&mut self) -> &mut dyn Queue<M>;
fn parser(&mut self) -> &mut dyn Parser<M>;
fn downloader(&mut self) -> &mut dyn Downloader;
fn processor(&mut self) -> &mut dyn Processor;
}
pub struct CallbackImpl<P>
where
P: Processor + Send,
{
queue: QueueClient,
pub processor: P,
}
impl<P> Callback<Message> for CallbackImpl<P>
where
P: Processor + Send,
{
fn queue(&mut self) -> &mut dyn Queue<Message> {
self
}
fn parser(&mut self) -> &mut dyn Parser<Message> {
self
}
fn downloader(&mut self) -> &mut dyn Downloader {
self
}
fn processor(&mut self) -> &mut dyn Processor {
&mut self.processor
}
}
impl<P> CallbackImpl<P>
where
P: Processor + Send,
{
pub fn new(queue_url: Url, processor: P) -> Self {
let queue = QueueClient::new(queue_url);
Self { queue, processor }
}
}
#[async_trait]
impl<P> Queue<Message> for CallbackImpl<P>
where
P: Processor + Send,
{
async fn pop(&mut self) -> Result<Option<Message>> {
self.queue.pop().await
}
async fn delete(&mut self, msg: Message) -> Result<()> {
self.queue.delete(msg).await
}
}
impl<P> Parser<Message> for CallbackImpl<P>
where
P: Processor + Send,
{
fn parse(&mut self, msg: &Message) -> Result<Url> {
let text = std::str::from_utf8(msg.data())?;
let url = Url::parse(text)?;
Ok(url)
}
}
#[async_trait]
impl<P> Downloader for CallbackImpl<P>
where
P: Processor + Send,
{
async fn download(&mut self, url: Url, dir: &Path) -> Result<PathBuf> {
use crate::tasks::utils::download_input;
let input = download_input(url, dir).await?;
Ok(input)
}
}

View File

@ -0,0 +1,243 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use anyhow::Result;
use async_trait::async_trait;
use reqwest::Url;
use std::path::Path;
use tempfile::{tempdir, TempDir};
use super::*;
type Msg = u64;
#[derive(Default)]
struct TestQueue {
pending: Vec<Msg>,
popped: Vec<Msg>,
deleted: Vec<Msg>,
}
#[async_trait]
impl Queue<Msg> for TestQueue {
async fn pop(&mut self) -> Result<Option<Msg>> {
let msg = self.pending.pop();
if let Some(msg) = msg {
self.popped.push(msg);
}
Ok(msg)
}
async fn delete(&mut self, msg: Msg) -> Result<()> {
self.deleted.push(msg);
Ok(())
}
}
pub struct TestQueueAlwaysFails;
#[async_trait]
impl Queue<Msg> for TestQueueAlwaysFails {
async fn pop(&mut self) -> Result<Option<Msg>> {
bail!("simulated `Queue::pop()` failure")
}
async fn delete(&mut self, _msg: Msg) -> Result<()> {
bail!("simulated `Queue::delete()` failure")
}
}
#[derive(Default)]
struct TestParser {
urls: Vec<Url>,
}
impl Parser<Msg> for TestParser {
fn parse(&mut self, msg: &Msg) -> Result<Url> {
// By returning the `Url` at index `msg`, we witness that `parse()` was
// called with `msg`, and simulate a valid input.
let url = self.urls[*msg as usize].clone();
Ok(url)
}
}
#[derive(Default)]
struct TestDownloader {
downloaded: Vec<Url>,
}
#[async_trait]
impl Downloader for TestDownloader {
async fn download(&mut self, url: Url, dir: &Path) -> Result<PathBuf> {
let name = url_input_name(&url);
let dst = dir.join(name);
self.downloaded.push(url);
Ok(dst)
}
}
#[derive(Default)]
struct TestProcessor {
processed: Vec<(Url, PathBuf)>,
}
#[async_trait]
impl Processor for TestProcessor {
async fn process(&mut self, url: Url, input: &Path) -> Result<()> {
self.processed.push((url, input.to_owned()));
Ok(())
}
}
fn url_input_name(url: &Url) -> String {
url.path_segments().unwrap().last().unwrap().to_owned()
}
fn fixture() -> (TempDir, InputPoller<Msg>) {
let dir = tempdir().unwrap();
let task = InputPoller::new(dir.path());
(dir, task)
}
fn url_fixture(msg: Msg) -> Url {
Url::parse(&format!("https://azure.com/c/{}", msg)).unwrap()
}
fn input_fixture(dir: &Path, msg: Msg) -> PathBuf {
let name = msg.to_string();
dir.join(name)
}
#[tokio::test]
async fn test_ready_poll() {
let (_, mut task) = fixture();
let msg: Msg = 0;
let mut queue = TestQueue::default();
queue.pending = vec![msg];
task.trigger(Event::Poll(&mut queue)).await.unwrap();
assert_eq!(task.state(), &State::Polled(Some(msg)));
assert_eq!(queue.popped, vec![msg]);
}
#[tokio::test]
async fn test_polled_some_parse() {
let (_, mut task) = fixture();
let msg: Msg = 0;
let url = url_fixture(msg);
task.set_state(State::Polled(Some(msg)));
let mut parser = TestParser::default();
parser.urls = vec![url.clone()]; // at index `msg`
task.trigger(Event::Parse(&mut parser)).await.unwrap();
assert_eq!(task.state(), &State::Parsed(msg, url));
}
#[tokio::test]
async fn test_polled_none_parse() {
let (_, mut task) = fixture();
task.set_state(State::Polled(None));
let mut parser = TestParser::default();
parser.urls = vec![];
task.trigger(Event::Parse(&mut parser)).await.unwrap();
assert_eq!(task.state(), &State::Ready);
}
#[tokio::test]
async fn test_parsed_download() {
let (dir, mut task) = fixture();
let msg: Msg = 0;
let url = url_fixture(msg);
let input = input_fixture(dir.path(), msg);
task.set_state(State::Parsed(msg, url.clone()));
let mut downloader = TestDownloader::default();
task.trigger(Event::Download(&mut downloader))
.await
.unwrap();
assert_eq!(task.state(), &State::Downloaded(msg, url.clone(), input));
assert_eq!(downloader.downloaded, vec![url]);
}
#[tokio::test]
async fn test_downloaded_process() {
let (dir, mut task) = fixture();
let msg: Msg = 0;
let url = url_fixture(msg);
let input = input_fixture(dir.path(), msg);
task.set_state(State::Downloaded(msg, url.clone(), input.clone()));
let mut processor = TestProcessor::default();
task.trigger(Event::Process(&mut processor)).await.unwrap();
assert_eq!(task.state(), &State::Processed(msg));
assert_eq!(processor.processed, vec![(url, input)]);
}
#[tokio::test]
async fn test_processed_finish() {
let (_, mut task) = fixture();
let msg: Msg = 0;
task.set_state(State::Processed(msg));
let mut queue = TestQueue::default();
task.trigger(Event::Finish(&mut queue)).await.unwrap();
assert_eq!(task.state(), &State::Ready);
assert_eq!(queue.deleted, vec![msg]);
}
#[tokio::test]
async fn test_invalid_trigger() {
let (_, mut task) = fixture();
let mut queue = TestQueue::default();
// Invalid transition: `(Ready, Finish)`.
let result = task.trigger(Event::Finish(&mut queue)).await;
assert!(result.is_err());
assert_eq!(task.state(), &State::Ready);
}
#[tokio::test]
async fn test_valid_trigger_failed_action() {
let (_, mut task) = fixture();
let mut queue = TestQueueAlwaysFails;
// Valid transition, but `queue.popo()` will return `Err`.
let result = task.trigger(Event::Poll(&mut queue)).await;
assert!(result.is_err());
assert_eq!(task.state(), &State::Ready);
}

View File

@ -0,0 +1,4 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
pub mod input_poller;

View File

@ -0,0 +1,164 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::onefuzz::machine_id::{get_machine_id, get_machine_name};
use crate::tasks::utils::CheckNotify;
use anyhow::Result;
use reqwest::Url;
use serde::{self, Deserialize, Serialize};
use std::{
collections::HashSet,
sync::{Arc, Mutex},
time::Duration,
};
use storage_queue::QueueClient;
use tokio::{
sync::Notify,
task::{self, JoinHandle},
};
use uuid::Uuid;
const DEFAULT_HEARTBEAT_PERIOD: Duration = Duration::from_secs(60 * 5);
#[derive(Debug, Deserialize, Serialize, Hash, Eq, PartialEq, Clone)]
#[serde(tag = "type")]
pub enum HeartbeatData {
TaskAlive,
MachineAlive,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
struct Heartbeat<'a> {
task_id: Uuid,
machine_id: Uuid,
machine_name: &'a str,
data: Vec<HeartbeatData>,
}
pub struct HeartbeatClient {
cancelled: Arc<Notify>,
messages: Arc<Mutex<HashSet<HeartbeatData>>>,
_heartbeat_process: JoinHandle<Result<()>>,
}
impl Drop for HeartbeatClient {
fn drop(&mut self) {
self.cancelled.notify();
}
}
impl HeartbeatClient {
pub fn init(queue_url: Url, task_id: Uuid) -> Self {
HeartbeatClient::init_with_period(queue_url, task_id, DEFAULT_HEARTBEAT_PERIOD)
}
pub fn init_with_period(queue_url: Url, task_id: Uuid, heartbeat_period: Duration) -> Self {
let messages = Arc::new(Mutex::new(HashSet::new()));
let cancelled = Arc::new(Notify::new());
let _heartbeat_process = HeartbeatClient::start_background_process(
task_id,
queue_url,
messages.clone(),
cancelled.clone(),
heartbeat_period,
);
HeartbeatClient {
messages,
_heartbeat_process,
cancelled,
}
}
fn drain_current_messages(messages: Arc<Mutex<HashSet<HeartbeatData>>>) -> Vec<HeartbeatData> {
let lock = messages.lock();
let mut messages = lock.unwrap();
let drain = messages.iter().cloned().collect::<Vec<HeartbeatData>>();
messages.clear();
drain
}
async fn flush(
task_id: Uuid,
machine_id: Uuid,
machine_name: &str,
queue_client: &QueueClient,
messages: Arc<Mutex<HashSet<HeartbeatData>>>,
) {
let mut data = HeartbeatClient::drain_current_messages(messages.clone());
data.push(HeartbeatData::MachineAlive);
let _ = queue_client
.enqueue(Heartbeat {
task_id,
data,
machine_id,
machine_name,
})
.await;
}
pub fn start_background_process(
task_id: Uuid,
queue_url: Url,
messages: Arc<Mutex<HashSet<HeartbeatData>>>,
cancelled: Arc<Notify>,
heartbeat_period: Duration,
) -> JoinHandle<Result<()>> {
let queue_client = QueueClient::new(queue_url);
task::spawn(async move {
let machine_id = get_machine_id().await?;
let machine_name = get_machine_name().await?;
HeartbeatClient::flush(
task_id,
machine_id,
&machine_name,
&queue_client,
messages.clone(),
)
.await;
while !cancelled.is_notified(heartbeat_period).await {
HeartbeatClient::flush(
task_id,
machine_id,
&machine_name,
&queue_client,
messages.clone(),
)
.await;
}
HeartbeatClient::flush(
task_id,
machine_id,
&machine_name,
&queue_client,
messages.clone(),
)
.await;
Ok(())
})
}
}
pub trait HeartbeatSender {
fn send(&self, data: HeartbeatData) -> Result<()>;
fn alive(&self) {
self.send(HeartbeatData::TaskAlive).unwrap()
}
}
impl HeartbeatSender for HeartbeatClient {
fn send(&self, data: HeartbeatData) -> Result<()> {
let mut messages_lock = self.messages.lock().unwrap();
messages_lock.insert(data);
Ok(())
}
}
impl HeartbeatSender for Option<HeartbeatClient> {
fn send(&self, data: HeartbeatData) -> Result<()> {
match self {
Some(client) => client.send(data),
None => Ok(()),
}
}
}

View File

@ -0,0 +1,175 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::tasks::{
config::{CommonConfig, SyncedDir},
heartbeat::HeartbeatSender,
utils,
};
use anyhow::Result;
use onefuzz::{expand::Expand, fs::set_executable};
use reqwest::Url;
use serde::Deserialize;
use std::{
collections::HashMap,
path::{Path, PathBuf},
process::Stdio,
sync::Arc,
};
use storage_queue::{QueueClient, EMPTY_QUEUE_DELAY};
use tokio::process::Command;
#[derive(Debug, Deserialize)]
struct QueueMessage {
content_length: u32,
url: Url,
}
#[derive(Debug, Deserialize)]
pub struct Config {
pub supervisor_exe: String,
pub supervisor_options: Vec<String>,
pub supervisor_env: HashMap<String, String>,
pub supervisor_input_marker: String,
pub target_exe: PathBuf,
pub target_options: Vec<String>,
pub target_options_merge: bool,
pub tools: SyncedDir,
pub input_queue: Url,
pub inputs: SyncedDir,
pub unique_inputs: SyncedDir,
#[serde(flatten)]
pub common: CommonConfig,
}
pub async fn spawn(config: Arc<Config>) -> Result<()> {
utils::init_dir(&config.tools.path).await?;
utils::sync_remote_dir(&config.tools, utils::SyncOperation::Pull).await?;
set_executable(&config.tools.path).await?;
utils::init_dir(&config.unique_inputs.path).await?;
let hb_client = config.common.init_heartbeat();
loop {
hb_client.alive();
let tmp_dir = PathBuf::from("./tmp");
verbose!("tmp dir reset");
utils::reset_tmp_dir(&tmp_dir).await?;
utils::sync_remote_dir(&config.unique_inputs, utils::SyncOperation::Pull).await?;
let mut queue = QueueClient::new(config.input_queue.clone());
if let Some(msg) = queue.pop().await? {
let input_url = match utils::parse_url_data(msg.data()) {
Ok(url) => url,
Err(err) => {
error!("could not parse input URL from queue message: {}", err);
return Ok(());
}
};
if let Err(error) = process_message(config.clone(), &input_url, &tmp_dir).await {
error!(
"failed to process latest message from notification queue: {}",
error
);
} else {
verbose!("will delete popped message with id = {}", msg.id());
queue.delete(msg).await?;
verbose!(
"Attempting to delete {} from the candidate container",
input_url.clone()
);
if let Err(e) = try_delete_blob(input_url.clone()).await {
error!("Failed to delete blob {}", e)
}
}
} else {
warn!("no new candidate inputs found, sleeping");
tokio::time::delay_for(EMPTY_QUEUE_DELAY).await;
}
}
}
async fn process_message(config: Arc<Config>, input_url: &Url, tmp_dir: &PathBuf) -> Result<()> {
let input_path = utils::download_input(input_url.clone(), &config.unique_inputs.path).await?;
info!("downloaded input to {}", input_path.display());
info!("Merging corpus");
match merge(&config, tmp_dir).await {
Ok(_) => {
// remove the 'queue' folder
let mut queue_dir = tmp_dir.clone();
queue_dir.push("queue");
let _delete_output = tokio::fs::remove_dir_all(queue_dir).await;
let synced_dir = SyncedDir {
path: tmp_dir.clone(),
url: config.unique_inputs.url.clone(),
};
utils::sync_remote_dir(&synced_dir, utils::SyncOperation::Push).await?;
}
Err(e) => error!("Merge failed : {}", e),
}
Ok(())
}
async fn try_delete_blob(input_url: Url) -> Result<()> {
let http_client = reqwest::Client::new();
match http_client
.delete(input_url)
.send()
.await?
.error_for_status()
{
Ok(_) => Ok(()),
Err(err) => Err(err.into()),
}
}
async fn merge(config: &Config, output_dir: impl AsRef<Path>) -> Result<()> {
let mut supervisor_args = Expand::new();
supervisor_args
.input(&config.supervisor_input_marker)
.input_corpus(&config.unique_inputs.path)
.target_options(&config.target_options)
.supervisor_exe(&config.supervisor_exe)
.supervisor_options(&config.supervisor_options)
.generated_inputs(output_dir)
.target_exe(&config.target_exe);
if config.target_options_merge {
supervisor_args.target_options(&config.target_options);
}
let supervisor_path = Expand::new()
.tools_dir(&config.tools.path)
.evaluate_value(&config.supervisor_exe)?;
let mut cmd = Command::new(supervisor_path);
cmd.kill_on_drop(true)
.env_remove("RUST_LOG")
.stdout(Stdio::piped())
.stderr(Stdio::piped());
for (k, v) in &config.supervisor_env {
cmd.env(k, v);
}
for arg in supervisor_args.evaluate(&config.supervisor_options)? {
cmd.arg(arg);
}
if !config.target_options_merge {
for arg in supervisor_args.evaluate(&config.target_options)? {
cmd.arg(arg);
}
}
info!("Starting merge '{:?}'", cmd);
cmd.spawn()?.wait_with_output().await?;
Ok(())
}

View File

@ -0,0 +1,137 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::tasks::{
config::{CommonConfig, SyncedDir},
heartbeat::*,
utils,
};
use anyhow::Result;
use onefuzz::libfuzzer::{LibFuzzer, LibFuzzerMergeOutput};
use reqwest::Url;
use serde::Deserialize;
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
};
use storage_queue::{QueueClient, EMPTY_QUEUE_DELAY};
#[derive(Debug, Deserialize)]
struct QueueMessage {
content_length: u32,
url: Url,
}
#[derive(Debug, Deserialize)]
pub struct Config {
pub target_exe: PathBuf,
pub target_env: HashMap<String, String>,
pub target_options: Vec<String>,
pub input_queue: Url,
pub inputs: SyncedDir,
pub unique_inputs: SyncedDir,
#[serde(flatten)]
pub common: CommonConfig,
}
pub async fn spawn(config: Arc<Config>) -> Result<()> {
let hb_client = config.common.init_heartbeat();
utils::init_dir(&config.unique_inputs.path).await?;
loop {
hb_client.alive();
if let Err(error) = process_message(config.clone()).await {
error!(
"failed to process latest message from notification queue: {}",
error
);
}
}
}
async fn process_message(config: Arc<Config>) -> Result<()> {
let tmp_dir = "./tmp";
verbose!("tmp dir reset");
utils::reset_tmp_dir(tmp_dir).await?;
utils::sync_remote_dir(&config.unique_inputs, utils::SyncOperation::Pull).await?;
let mut queue = QueueClient::new(config.input_queue.clone());
if let Some(msg) = queue.pop().await? {
let input_url = match utils::parse_url_data(msg.data()) {
Ok(url) => url,
Err(err) => {
error!("could not parse input URL from queue message: {}", err);
return Ok(());
}
};
let input_path = utils::download_input(input_url.clone(), tmp_dir).await?;
info!("downloaded input to {}", input_path.display());
info!("Merging corpus");
match merge(
&config.target_exe,
&config.target_options,
&config.target_env,
&config.unique_inputs.path,
&tmp_dir,
)
.await
{
Ok(result) if result.added_files_count > 0 => {
info!("Added {} new files to the corpus", result.added_files_count);
utils::sync_remote_dir(&config.unique_inputs, utils::SyncOperation::Push).await?;
}
Ok(_) => info!("No new files added by the merge"),
Err(e) => error!("Merge failed : {}", e),
}
verbose!("will delete popped message with id = {}", msg.id());
queue.delete(msg).await?;
verbose!(
"Attempting to delete {} from the candidate container",
input_url.clone()
);
if let Err(e) = try_delete_blob(input_url.clone()).await {
error!("Failed to delete blob {}", e)
}
Ok(())
} else {
warn!("no new candidate inputs found, sleeping");
tokio::time::delay_for(EMPTY_QUEUE_DELAY).await;
Ok(())
}
}
async fn try_delete_blob(input_url: Url) -> Result<()> {
let http_client = reqwest::Client::new();
match http_client
.delete(input_url)
.send()
.await?
.error_for_status()
{
Ok(_) => Ok(()),
Err(err) => Err(err.into()),
}
}
async fn merge(
target_exe: &Path,
target_options: &[String],
target_env: &HashMap<String, String>,
corpus_dir: &Path,
candidate_dir: impl AsRef<Path>,
) -> Result<LibFuzzerMergeOutput> {
let merger = LibFuzzer::new(target_exe, target_options, target_env);
let candidates = vec![candidate_dir];
merger.merge(&corpus_dir, &candidates).await
}

View File

@ -0,0 +1,5 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
pub mod generic;
pub mod libfuzzer_merge;

View File

@ -0,0 +1,13 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
pub mod analysis;
pub mod config;
pub mod coverage;
pub mod fuzz;
pub mod generic;
pub mod heartbeat;
pub mod merge;
pub mod report;
pub mod stats;
pub mod utils;

View File

@ -0,0 +1,162 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use crate::tasks::config::SyncedDir;
use anyhow::Result;
use onefuzz::{
asan::AsanLog,
blob::{BlobClient, BlobContainerUrl, BlobUrl},
};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use uuid::Uuid;
#[derive(Debug, Deserialize, Serialize)]
pub struct CrashReport {
pub input_sha256: String,
pub input_blob: InputBlob,
pub executable: PathBuf,
pub crash_type: String,
pub crash_site: String,
pub call_stack: Vec<String>,
pub call_stack_sha256: String,
pub asan_log: Option<String>,
pub task_id: Uuid,
pub job_id: Uuid,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct NoCrash {
pub input_sha256: String,
pub input_blob: InputBlob,
pub executable: PathBuf,
pub task_id: Uuid,
pub job_id: Uuid,
pub tries: u64,
pub error: Option<String>,
}
#[derive(Debug, Deserialize, Serialize)]
pub enum CrashTestResult {
CrashReport(CrashReport),
NoRepro(NoCrash),
}
// Conditionally upload a report, if it would not be a duplicate.
//
// Use SHA-256 of call stack as dedupe key.
async fn upload_deduped(report: &CrashReport, container: &BlobContainerUrl) -> Result<()> {
let blob = BlobClient::new();
let deduped_name = report.unique_blob_name();
let deduped_url = container.blob(deduped_name).url();
blob.put(deduped_url)
.json(report)
// Conditional PUT, only if-not-exists.
.header("If-None-Match", "*")
.send()
.await?;
Ok(())
}
async fn upload_report(report: &CrashReport, container: &BlobContainerUrl) -> Result<()> {
let blob = BlobClient::new();
let url = container.blob(report.blob_name()).url();
blob.put(url).json(report).send().await?;
Ok(())
}
async fn upload_no_repro(report: &NoCrash, container: &BlobContainerUrl) -> Result<()> {
let blob = BlobClient::new();
let url = container.blob(report.blob_name()).url();
blob.put(url).json(report).send().await?;
Ok(())
}
impl CrashTestResult {
pub async fn upload(
&self,
unique_reports: &SyncedDir,
reports: &Option<SyncedDir>,
no_repro: &Option<SyncedDir>,
) -> Result<()> {
match self {
Self::CrashReport(report) => {
upload_deduped(report, &unique_reports.url).await?;
if let Some(reports) = reports {
upload_report(report, &reports.url).await?;
}
}
Self::NoRepro(report) => {
if let Some(no_repro) = no_repro {
upload_no_repro(report, &no_repro.url).await?;
}
}
}
Ok(())
}
}
#[derive(Debug, Deserialize, Serialize)]
pub struct InputBlob {
pub account: String,
pub container: String,
pub name: String,
}
impl From<BlobUrl> for InputBlob {
fn from(blob: BlobUrl) -> Self {
Self {
account: blob.account(),
container: blob.container(),
name: blob.name(),
}
}
}
impl CrashReport {
pub fn new(
asan_log: AsanLog,
task_id: Uuid,
job_id: Uuid,
executable: impl Into<PathBuf>,
input_blob: InputBlob,
input_sha256: String,
) -> Self {
Self {
input_sha256,
input_blob,
executable: executable.into(),
crash_type: asan_log.fault_type().into(),
crash_site: asan_log.summary().into(),
call_stack: asan_log.call_stack().to_vec(),
call_stack_sha256: asan_log.call_stack_sha256(),
asan_log: Some(asan_log.text().to_string()),
task_id,
job_id,
}
}
pub fn blob_name(&self) -> String {
format!("{}.json", self.input_sha256)
}
pub fn unique_blob_name(&self) -> String {
format!("{}.json", self.call_stack_sha256)
}
}
impl NoCrash {
pub fn blob_name(&self) -> String {
format!("{}.json", self.input_sha256)
}
}

View File

@ -0,0 +1,171 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use super::crash_report::{CrashReport, CrashTestResult, InputBlob, NoCrash};
use crate::tasks::{
config::{CommonConfig, SyncedDir},
generic::input_poller::{CallbackImpl, InputPoller, Processor},
heartbeat::*,
};
use anyhow::Result;
use async_trait::async_trait;
use onefuzz::{blob::BlobUrl, input_tester::Tester, sha256};
use reqwest::Url;
use serde::Deserialize;
use std::{
collections::HashMap,
path::{Path, PathBuf},
};
use storage_queue::Message;
fn default_bool_true() -> bool {
true
}
#[derive(Debug, Deserialize)]
pub struct Config {
pub target_exe: PathBuf,
#[serde(default)]
pub target_options: Vec<String>,
#[serde(default)]
pub target_env: HashMap<String, String>,
pub input_queue: Option<Url>,
pub crashes: Option<SyncedDir>,
pub reports: Option<SyncedDir>,
pub unique_reports: SyncedDir,
pub no_repro: Option<SyncedDir>,
pub target_timeout: Option<u64>,
#[serde(default)]
pub check_asan_log: bool,
#[serde(default = "default_bool_true")]
pub check_debugger: bool,
#[serde(default)]
pub check_retry_count: u64,
#[serde(flatten)]
pub common: CommonConfig,
}
pub struct ReportTask<'a> {
config: &'a Config,
poller: InputPoller<Message>,
}
impl<'a> ReportTask<'a> {
pub fn new(config: &'a Config) -> Self {
let working_dir = config.common.task_id.to_string();
let poller = InputPoller::new(working_dir);
Self { config, poller }
}
pub async fn run(&mut self) -> Result<()> {
info!("Starting generic crash report task");
let mut processor = GenericReportProcessor::new(&self.config);
if let Some(crashes) = &self.config.crashes {
self.poller.batch_process(&mut processor, &crashes).await?;
}
if let Some(queue) = &self.config.input_queue {
let callback = CallbackImpl::new(queue.clone(), processor);
self.poller.run(callback).await?;
}
Ok(())
}
}
pub struct GenericReportProcessor<'a> {
config: &'a Config,
tester: Tester<'a>,
heartbeat_client: Option<HeartbeatClient>,
}
impl<'a> GenericReportProcessor<'a> {
pub fn new(config: &'a Config) -> Self {
let heartbeat_client = config.common.init_heartbeat();
let tester = Tester::new(
&config.target_exe,
&config.target_options,
&config.target_env,
&config.target_timeout,
config.check_asan_log,
config.check_debugger,
config.check_retry_count,
);
Self {
config,
tester,
heartbeat_client,
}
}
pub async fn test_input(&self, input_url: Url, input: &Path) -> Result<CrashTestResult> {
self.heartbeat_client.alive();
let input_sha256 = sha256::digest_file(input).await?;
let task_id = self.config.common.task_id;
let job_id = self.config.common.job_id;
let input_blob = InputBlob::from(BlobUrl::new(input_url)?);
let test_report = self.tester.test_input(input).await?;
if let Some(asan_log) = test_report.asan_log {
let crash_report = CrashReport::new(
asan_log,
task_id,
job_id,
&self.config.target_exe,
input_blob,
input_sha256,
);
Ok(CrashTestResult::CrashReport(crash_report))
} else if let Some(crash) = test_report.crash {
let call_stack_sha256 = sha256::digest_iter(&crash.call_stack);
let crash_report = CrashReport {
input_blob,
input_sha256,
executable: PathBuf::from(&self.config.target_exe),
call_stack: crash.call_stack,
crash_type: crash.crash_type,
crash_site: crash.crash_site,
call_stack_sha256,
asan_log: None,
task_id,
job_id,
};
Ok(CrashTestResult::CrashReport(crash_report))
} else {
let no_repro = NoCrash {
input_blob,
input_sha256,
executable: PathBuf::from(&self.config.target_exe),
task_id,
job_id,
tries: 1 + self.config.check_retry_count,
error: test_report.error.map(|e| format!("{}", e)),
};
Ok(CrashTestResult::NoRepro(no_repro))
}
}
}
#[async_trait]
impl<'a> Processor for GenericReportProcessor<'a> {
async fn process(&mut self, url: Url, input: &Path) -> Result<()> {
let report = self.test_input(url, input).await?;
report
.upload(
&self.config.unique_reports,
&self.config.reports,
&self.config.no_repro,
)
.await
}
}

View File

@ -0,0 +1,149 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use super::crash_report::*;
use crate::tasks::{
config::{CommonConfig, SyncedDir},
generic::input_poller::*,
heartbeat::*,
};
use anyhow::Result;
use async_trait::async_trait;
use onefuzz::{blob::BlobUrl, libfuzzer::LibFuzzer, sha256};
use reqwest::Url;
use serde::Deserialize;
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
};
use storage_queue::Message;
#[derive(Debug, Deserialize)]
pub struct Config {
pub target_exe: PathBuf,
pub target_env: HashMap<String, String>,
// TODO: options are not yet used for crash reporting
pub target_options: Vec<String>,
pub target_timeout: Option<u64>,
pub input_queue: Option<Url>,
pub crashes: Option<SyncedDir>,
pub reports: Option<SyncedDir>,
pub unique_reports: SyncedDir,
pub no_repro: Option<SyncedDir>,
#[serde(default)]
pub check_retry_count: u64,
#[serde(flatten)]
pub common: CommonConfig,
}
pub struct ReportTask {
config: Arc<Config>,
pub poller: InputPoller<Message>,
}
impl ReportTask {
pub fn new(config: impl Into<Arc<Config>>) -> Self {
let config = config.into();
let working_dir = config.common.task_id.to_string();
let poller = InputPoller::new(working_dir);
Self { config, poller }
}
pub async fn run(&mut self) -> Result<()> {
info!("Starting libFuzzer crash report task");
let mut processor = AsanProcessor::new(self.config.clone());
if let Some(crashes) = &self.config.crashes {
self.poller.batch_process(&mut processor, crashes).await?;
}
if let Some(queue) = &self.config.input_queue {
let callback = CallbackImpl::new(queue.clone(), processor);
self.poller.run(callback).await?;
}
Ok(())
}
}
pub struct AsanProcessor {
config: Arc<Config>,
heartbeat_client: Option<HeartbeatClient>,
}
impl AsanProcessor {
pub fn new(config: Arc<Config>) -> Self {
let heartbeat_client = config.common.init_heartbeat();
Self {
config,
heartbeat_client,
}
}
pub async fn test_input(&self, input_url: Url, input: &Path) -> Result<CrashTestResult> {
self.heartbeat_client.alive();
let fuzzer = LibFuzzer::new(
&self.config.target_exe,
&self.config.target_options,
&self.config.target_env,
);
let task_id = self.config.common.task_id;
let job_id = self.config.common.job_id;
let input_blob = InputBlob::from(BlobUrl::new(input_url)?);
let input_sha256 = sha256::digest_file(input).await?;
let test_report = fuzzer
.repro(
input,
self.config.target_timeout,
self.config.check_retry_count,
)
.await?;
match test_report.asan_log {
Some(asan_log) => {
let crash_report = CrashReport::new(
asan_log,
task_id,
job_id,
&self.config.target_exe,
input_blob,
input_sha256,
);
Ok(CrashTestResult::CrashReport(crash_report))
}
None => {
let no_repro = NoCrash {
input_blob,
input_sha256,
executable: PathBuf::from(&self.config.target_exe),
task_id,
job_id,
tries: 1 + self.config.check_retry_count,
error: test_report.error.map(|e| format!("{}", e)),
};
Ok(CrashTestResult::NoRepro(no_repro))
}
}
}
}
#[async_trait]
impl Processor for AsanProcessor {
async fn process(&mut self, url: Url, input: &Path) -> Result<()> {
let report = self.test_input(url, input).await?;
report
.upload(
&self.config.unique_reports,
&self.config.reports,
&self.config.no_repro,
)
.await
}
}

View File

@ -0,0 +1,6 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
pub mod crash_report;
pub mod generic;
pub mod libfuzzer_report;

View File

@ -0,0 +1,109 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use anyhow::{Error, Result};
use onefuzz::telemetry::EventData;
use std::path::Path;
use tokio::io::AsyncBufReadExt;
pub async fn read_stats(output_path: impl AsRef<Path>) -> Result<Vec<EventData>, Error> {
let f = tokio::fs::File::open(output_path).await?;
let mut stats = Vec::new();
let reader = tokio::io::BufReader::new(f);
let mut lines = reader.lines();
while let Ok(Some(line)) = lines.next_line().await {
let mut name_value = line.split(':');
let name = name_value.next().unwrap().trim();
let value = name_value.next().unwrap().trim();
match name {
"target_mode" => {
stats.push(EventData::Mode(value.to_string()));
}
"paths_total" => {
if let Ok(value) = u64::from_str_radix(&value, 10) {
stats.push(EventData::CoveragePaths(value));
} else {
error!("unable to parse telemetry: {:?} {:?}", name, value);
}
}
"fuzzer_pid" => {
if let Ok(value) = u32::from_str_radix(&value, 10) {
stats.push(EventData::Pid(value));
} else {
error!("unable to parse telemetry: {:?} {:?}", name, value);
}
}
"execs_done" => {
if let Ok(value) = u64::from_str_radix(&value, 10) {
stats.push(EventData::Count(value));
} else {
error!("unable to parse telemetry: {:?} {:?}", name, value);
}
}
"paths_favored" => {
if let Ok(value) = u64::from_str_radix(&value, 10) {
stats.push(EventData::CoveragePathsFavored(value));
} else {
error!("unable to parse telemetry: {:?} {:?}", name, value);
}
}
"paths_found" => {
if let Ok(value) = u64::from_str_radix(&value, 10) {
stats.push(EventData::CoveragePathsFound(value));
} else {
error!("unable to parse telemetry: {:?} {:?}", name, value);
}
}
"paths_imported" => {
if let Ok(value) = u64::from_str_radix(&value, 10) {
stats.push(EventData::CoveragePathsImported(value));
} else {
error!("unable to parse telemetry: {:?} {:?}", name, value);
}
}
"execs_per_sec" => {
if let Ok(value) = value.parse::<f64>() {
stats.push(EventData::ExecsSecond(value));
} else {
error!("unable to parse telemetry: {:?} {:?}", name, value);
}
}
"bitmap_cvg" => {
let value = value.replace("%", "");
if let Ok(value) = value.parse::<f64>() {
stats.push(EventData::Coverage(value));
} else {
error!("unable to parse telemetry: {:?} {:?}", name, value);
}
}
"command_line" => {
stats.push(EventData::CommandLine(value.to_string()));
}
// ignored telemetry
"cycles_done" | "afl_banner" | "afl_version" | "start_time" | "last_update"
| "stability" | "unique_crashes" | "unique_hangs" | "pending_favs"
| "pending_total" | "variable_paths" | "last_path" | "last_crash" | "last_hang"
| "execs_since_crash" | "max_depth" | "cur_path" | "exec_timeout" => {}
_ => {
warn!("unsupported telemetry: {} {}", name, value);
}
}
}
Ok(stats)
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_stats_parse() {
let results = read_stats("data/afl-fuzzer_stats.txt").await.unwrap();
assert!(results.len() > 5);
assert!(results.contains(&EventData::Pid(26515)));
assert!(results.contains(&EventData::ExecsSecond(2666.67)));
assert!(results.contains(&EventData::Mode("default".to_string())));
}
}

View File

@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use super::afl;
use anyhow::{Error, Result};
use onefuzz::telemetry::{track_event, Event::runtime_stats};
use serde::Deserialize;
pub const STATS_DELAY: std::time::Duration = std::time::Duration::from_secs(30);
#[derive(Debug, Deserialize, Clone)]
pub enum StatsFormat {
AFL,
}
pub async fn monitor_stats(path: Option<String>, format: Option<StatsFormat>) -> Result<(), Error> {
if let Some(path) = path {
if let Some(format) = format {
loop {
let stats = match format {
StatsFormat::AFL => afl::read_stats(&path).await,
};
if let Ok(stats) = stats {
track_event(runtime_stats, stats);
}
tokio::time::delay_for(STATS_DELAY).await;
}
}
}
Ok(())
}

View File

@ -0,0 +1,5 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
pub mod afl;
pub mod common;

View File

@ -0,0 +1,182 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
use std::path::{Path, PathBuf};
use std::time::Duration;
use anyhow::Result;
use async_trait::async_trait;
use futures::{future::Future, stream::StreamExt};
use onefuzz::{
az_copy,
monitor::DirectoryMonitor,
telemetry::{Event::new_result, EventData},
};
use reqwest::Url;
use tokio::{fs, io};
use crate::tasks::config::SyncedDir;
#[derive(Debug)]
pub enum SyncOperation {
Push,
Pull,
}
pub async fn download_input(input_url: Url, dst: impl AsRef<Path>) -> Result<PathBuf> {
let file_name = input_url.path_segments().unwrap().last().unwrap();
let file_path = dst.as_ref().join(file_name);
let resp = reqwest::get(input_url).await?;
let body = resp.bytes().await?;
let mut body = body.as_ref();
let file = fs::OpenOptions::new()
.create(true)
.write(true)
.open(&file_path)
.await?;
let mut writer = io::BufWriter::new(file);
io::copy(&mut body, &mut writer).await?;
Ok(file_path)
}
pub async fn reset_tmp_dir(tmp_dir: impl AsRef<Path>) -> Result<()> {
let tmp_dir = tmp_dir.as_ref();
let dir_exists = fs::metadata(tmp_dir).await.is_ok();
if dir_exists {
fs::remove_dir_all(tmp_dir).await?;
verbose!("deleted {}", tmp_dir.display());
}
fs::create_dir_all(tmp_dir).await?;
verbose!("created {}", tmp_dir.display());
Ok(())
}
pub async fn sync_remote_dir(sync_dir: &SyncedDir, sync_operation: SyncOperation) -> Result<()> {
let dir = &sync_dir.path;
let url = sync_dir.url.url();
let url = url.as_ref();
info!("syncing {:?} {:?}", sync_operation, sync_dir.path);
match sync_operation {
SyncOperation::Push => az_copy::sync(dir, url).await,
SyncOperation::Pull => az_copy::sync(url, dir).await,
}
}
pub async fn init_dir(path: impl AsRef<Path>) -> Result<()> {
let path = path.as_ref();
match fs::metadata(path).await {
Ok(m) => {
if m.is_dir() {
Ok(())
} else {
anyhow::bail!("File with name '{}' already exists", path.display());
}
}
Err(_) => fs::create_dir(path).await.map_err(|e| e.into()),
}
}
pub fn parse_url_data(data: &[u8]) -> Result<Url> {
let text = std::str::from_utf8(data)?;
let url = Url::parse(text)?;
Ok(url)
}
#[async_trait]
pub trait CheckNotify {
async fn is_notified(&self, delay: Duration) -> bool;
}
#[async_trait]
impl CheckNotify for tokio::sync::Notify {
async fn is_notified(&self, delay: Duration) -> bool {
let notify = self;
tokio::select! {
() = tokio::time::delay_for(delay) => false,
() = notify.notified() => true,
}
}
}
const DELAY: Duration = Duration::from_secs(10);
pub fn file_uploader_monitor(synced_dir: SyncedDir) -> Result<impl Future> {
verbose!("monitoring {}", synced_dir.path.display());
let dir = synced_dir.path;
let url = synced_dir.url;
let mut monitor = DirectoryMonitor::new(&dir);
monitor.start()?;
let monitor = monitor.for_each(move |item| {
verbose!("saw item = {}", item.display());
let url = url.clone();
async move {
event!(new_result; EventData::Path = item.display().to_string());
let mut uploader = onefuzz::uploader::BlobUploader::new(url.url());
let result = uploader.upload(item.clone()).await;
if let Err(err) = result {
error!("couldn't upload item = {}, error = {}", item.display(), err);
} else {
verbose!("uploaded item = {}", item.display());
}
}
});
verbose!("done monitoring {}", dir.display());
Ok(monitor)
}
/// Monitor a directory for results.
///
/// This function does not require the directory to exist before it is called.
/// If the directory is reset (unlinked and recreated), this function will stop
/// listening to the original filesystem node, and begin watching the new one
/// once it has been created.
///
/// The intent of this is to support use cases where we usually want a directory
/// to be initialized, but a user-supplied binary, (such as AFL) logically owns
/// a directory, and may reset it.
pub async fn monitor_result_dir(synced_dir: SyncedDir) -> Result<()> {
loop {
verbose!("waiting to monitor {}", synced_dir.path.display());
while fs::metadata(&synced_dir.path).await.is_err() {
verbose!(
"dir {} not ready to monitor, delaying",
synced_dir.path.display()
);
tokio::time::delay_for(DELAY).await;
}
verbose!("starting monitor for {}", synced_dir.path.display());
file_uploader_monitor(synced_dir.clone())?.await;
}
}
pub fn parse_key_value(value: String) -> Result<(String, String)> {
let offset = value
.find('=')
.ok_or_else(|| format_err!("invalid key=value, no = found {:?}", value))?;
Ok((value[..offset].to_string(), value[offset + 1..].to_string()))
}