diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 35cd676a9..a2216c191 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -39,6 +39,12 @@ version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1" +[[package]] +name = "anymap" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33954243bd79057c2de7338850b85983a44588021f8a5fee574a8888c6de4344" + [[package]] name = "appinsights" version = "0.1.5" @@ -215,15 +221,16 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "backoff" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721c249ab59cbc483ad4294c9ee2671835c1e43e9ffc277e6b4ecfef733cfdc5" +checksum = "9fe17f59a06fe8b87a6fc8bf53bb70b3aba76d7685f432487a68cd5552853625" dependencies = [ "async-std", "futures-core", + "getrandom 0.2.2", "instant", - "pin-project 0.4.27", - "rand 0.7.3", + "pin-project", + "rand 0.8.3", ] [[package]] @@ -541,6 +548,17 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.9", + "syn 1.0.63", +] + [[package]] name = "digest" version = "0.9.0" @@ -745,7 +763,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ab7d1bd1bd33cc98b0889831b72da23c0aa4df9cec7e0702f46ecea04b35db6" dependencies = [ "bitflags", - "fsevent-sys", + "fsevent-sys 2.0.1", +] + +[[package]] +name = "fsevent" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97f347202c95c98805c216f9e1df210e8ebaec9fdb2365700a43c10797a35e63" +dependencies = [ + "bitflags", + "fsevent-sys 3.0.2", ] [[package]] @@ -757,6 +785,15 @@ dependencies = [ "libc", ] +[[package]] +name = "fsevent-sys" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a29c77f1ca394c3e73a9a5d24cfcabb734682d9634fc398f2204a63c994120" +dependencies = [ + "libc", +] + [[package]] name = "fuchsia-zircon" version = "0.3.3" @@ -1064,7 +1101,7 @@ dependencies = [ "httparse", "httpdate", "itoa", - "pin-project 1.0.5", + "pin-project", "socket2", "tokio", "tower-service", @@ -1128,6 +1165,17 @@ dependencies = [ "libc", ] +[[package]] +name = "inotify" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d19f57db1baad9d09e43a3cd76dcf82ebdafd37d75c9498b87762dba77c93f15" +dependencies = [ + "bitflags", + "inotify-sys", + "libc", +] + [[package]] name = "inotify-sys" version = "0.1.5" @@ -1347,6 +1395,19 @@ dependencies = [ "winapi 0.2.8", ] +[[package]] +name = "mio" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5dede4e2065b3842b8b0af444119f3aa331cc7cc2dd20388bfb0f5d5a38823a" +dependencies = [ + "libc", + "log", + "miow 0.3.6", + "ntapi", + "winapi 0.3.9", +] + [[package]] name = "mio-extras" version = "2.0.6" @@ -1355,7 +1416,7 @@ checksum = "52403fe290012ce777c4626790c8951324a2b9e3316b3143779c72b029742f19" dependencies = [ "lazycell", "log", - "mio", + "mio 0.6.23", "slab", ] @@ -1366,7 +1427,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656" dependencies = [ "log", - "mio", + "mio 0.6.23", "miow 0.3.6", "winapi 0.3.9", ] @@ -1379,7 +1440,7 @@ checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0" dependencies = [ "iovec", "libc", - "mio", + "mio 0.6.23", ] [[package]] @@ -1484,16 +1545,35 @@ checksum = "80ae4a7688d1fab81c5bf19c64fc8db920be8d519ce6336ed4e7efe024724dbd" dependencies = [ "bitflags", "filetime", - "fsevent", - "fsevent-sys", - "inotify", + "fsevent 0.4.0", + "fsevent-sys 2.0.1", + "inotify 0.7.1", "libc", - "mio", + "mio 0.6.23", "mio-extras", "walkdir", "winapi 0.3.9", ] +[[package]] +name = "notify" +version = "5.0.0-pre.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5fd82b93434edb9c00ae65ee741e0e081cdc8c63346ab9f687935a629aaf4c3" +dependencies = [ + "anymap", + "bitflags", + "crossbeam-channel 0.5.0", + "filetime", + "fsevent 2.0.2", + "fsevent-sys 3.0.2", + "inotify 0.9.2", + "libc", + "mio 0.7.9", + "walkdir", + "winapi 0.3.9", +] + [[package]] name = "ntapi" version = "0.3.6" @@ -1561,7 +1641,7 @@ dependencies = [ "lazy_static", "log", "nix 0.19.1", - "notify", + "notify 4.0.15", "onefuzz-telemetry", "pete", "proc-maps", @@ -1579,7 +1659,7 @@ dependencies = [ "storage-queue", "strum", "strum_macros", - "sysinfo", + "sysinfo 0.16.4", "tempfile", "tokio", "tokio-util", @@ -1596,6 +1676,7 @@ dependencies = [ "anyhow", "appinsights", "async-trait", + "backoff", "clap", "env_logger", "futures", @@ -1605,6 +1686,7 @@ dependencies = [ "num_cpus", "onefuzz", "onefuzz-telemetry", + "path-absolutize", "reqwest", "reqwest-retry", "serde", @@ -1730,6 +1812,24 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "path-absolutize" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6ab2aaa5faefed84db46e4398eab15fa51325606462b5da8b0e230af3ac59a" +dependencies = [ + "path-dedot", +] + +[[package]] +name = "path-dedot" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "658c6e985fce9c25289fe7c86c08a3cbe82c19a3cd5b3bc5945c8c632552e460" +dependencies = [ + "once_cell", +] + [[package]] name = "pdb" version = "0.6.0" @@ -1758,33 +1858,13 @@ dependencies = [ "thiserror", ] -[[package]] -name = "pin-project" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffbc8e94b38ea3d2d8ba92aea2983b503cd75d0888d75b86bb37970b5698e15" -dependencies = [ - "pin-project-internal 0.4.27", -] - [[package]] name = "pin-project" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96fa8ebb90271c4477f144354485b8068bd8f6b78b428b01ba892ca26caf0b63" dependencies = [ - "pin-project-internal 1.0.5", -] - -[[package]] -name = "pin-project-internal" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65ad2ae56b6abe3a1ee25f15ee605bacadb9a764edaba9c2bf4103800d4a1895" -dependencies = [ - "proc-macro2 1.0.24", - "quote 1.0.9", - "syn 1.0.63", + "pin-project-internal", ] [[package]] @@ -1939,6 +2019,16 @@ dependencies = [ "libc", ] +[[package]] +name = "queue-file" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82adc209678e4bb644900ccf43dc11e041a940e1f0c332a66985d01e02ca7451" +dependencies = [ + "bytes 0.5.6", + "snafu", +] + [[package]] name = "quote" version = "0.6.13" @@ -2141,6 +2231,7 @@ dependencies = [ "anyhow", "async-trait", "backoff", + "log", "onefuzz-telemetry", "reqwest", "tokio", @@ -2380,6 +2471,27 @@ dependencies = [ "syn 0.15.44", ] +[[package]] +name = "snafu" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7" +dependencies = [ + "doc-comment", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.9", + "syn 1.0.63", +] + [[package]] name = "socket2" version = "0.3.19" @@ -2409,7 +2521,11 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "backoff", "base64", + "bytes 0.5.6", + "derivative", + "queue-file", "regex", "reqwest", "reqwest-retry", @@ -2417,7 +2533,9 @@ dependencies = [ "serde-xml-rs", "serde_derive", "serde_json", + "tokio", "uuid", + "yaque", ] [[package]] @@ -2502,6 +2620,21 @@ dependencies = [ "unicode-xid 0.2.1", ] +[[package]] +name = "sysinfo" +version = "0.14.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2983daff11a197c7c406b130579bc362177aa54cf2cc1f34d6ac88fccaa6a5e1" +dependencies = [ + "cfg-if 0.1.10", + "doc-comment", + "libc", + "ntapi", + "once_cell", + "rayon", + "winapi 0.3.9", +] + [[package]] name = "sysinfo" version = "0.16.4" @@ -2608,7 +2741,7 @@ dependencies = [ "lazy_static", "libc", "memchr", - "mio", + "mio 0.6.23", "mio-named-pipes", "mio-uds", "num_cpus", @@ -2688,7 +2821,7 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" dependencies = [ - "pin-project 1.0.5", + "pin-project", "tracing", ] @@ -3070,6 +3203,20 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a" +[[package]] +name = "yaque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "543707de19373df21757dc231c46407701d0b05a8067542584ea5c6fa8602725" +dependencies = [ + "futures", + "lazy_static", + "log", + "notify 5.0.0-pre.6", + "rand 0.7.3", + "sysinfo 0.14.15", +] + [[package]] name = "z3-sys" version = "0.6.3" diff --git a/src/agent/onefuzz-agent/Cargo.toml b/src/agent/onefuzz-agent/Cargo.toml index 582943ea4..5c5e3625a 100644 --- a/src/agent/onefuzz-agent/Cargo.toml +++ b/src/agent/onefuzz-agent/Cargo.toml @@ -13,6 +13,7 @@ integration_test=[] anyhow = "1.0" appinsights = "0.1" async-trait = "0.1" +backoff = { version = "0.3", features = ["async-std"] } clap = "2.33" tempfile = "3.2" env_logger = "0.8" @@ -32,6 +33,7 @@ onefuzz = { path = "../onefuzz" } storage-queue = { path = "../storage-queue" } reqwest-retry = { path = "../reqwest-retry" } onefuzz-telemetry = { path = "../onefuzz-telemetry" } +path-absolutize = "3.0.6" [dev-dependencies] tempfile = "3.2" diff --git a/src/agent/onefuzz-agent/src/debug/cmd.rs b/src/agent/onefuzz-agent/src/debug/cmd.rs deleted file mode 100644 index 15cf6d0ab..000000000 --- a/src/agent/onefuzz-agent/src/debug/cmd.rs +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use anyhow::Result; -use clap::{App, SubCommand}; - -use crate::{debug::libfuzzer_merge, local::common::add_common_config}; - -const LIBFUZZER_MERGE: &str = "libfuzzer-merge"; - -pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { - match args.subcommand() { - (LIBFUZZER_MERGE, Some(sub)) => libfuzzer_merge::run(sub).await, - _ => { - anyhow::bail!("missing subcommand\nUSAGE: {}", args.usage()); - } - } -} - -pub fn args(name: &str) -> App<'static, 'static> { - SubCommand::with_name(name) - .about("unsupported internal debugging commands") - .subcommand(add_common_config(libfuzzer_merge::args(LIBFUZZER_MERGE))) -} diff --git a/src/agent/onefuzz-agent/src/debug/libfuzzer_merge.rs b/src/agent/onefuzz-agent/src/debug/libfuzzer_merge.rs deleted file mode 100644 index 7d396fd44..000000000 --- a/src/agent/onefuzz-agent/src/debug/libfuzzer_merge.rs +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -use crate::{ - local::common::{ - add_cmd_options, build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, CmdType, - }, - tasks::merge::libfuzzer_merge::{merge_inputs, Config}, -}; -use anyhow::Result; -use clap::{App, Arg, SubCommand}; -use onefuzz::syncdir::SyncedDir; -use std::sync::Arc; - -pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { - let target_exe = get_cmd_exe(CmdType::Target, args)?.into(); - let target_env = get_cmd_env(CmdType::Target, args)?; - let target_options = get_cmd_arg(CmdType::Target, args); - - let inputs = value_t!(args, "inputs", String)?; - let unique_inputs = value_t!(args, "unique_inputs", String)?; - let check_fuzzer_help = false; - - let common = build_common_config(args)?; - let config = Arc::new(Config { - target_exe, - target_env, - target_options, - check_fuzzer_help, - input_queue: None, - inputs: vec![SyncedDir { - path: inputs.into(), - url: None, - }], - unique_inputs: SyncedDir { - path: unique_inputs.into(), - url: None, - }, - common, - preserve_existing_outputs: true, - }); - - let results = merge_inputs(config.clone(), vec![config.clone().inputs[0].path.clone()]).await?; - println!("{:#?}", results); - Ok(()) -} - -pub fn args(name: &'static str) -> App<'static, 'static> { - let mut app = SubCommand::with_name(name).about("execute a local-only libfuzzer merge task"); - - app = add_cmd_options(CmdType::Target, true, true, true, app); - app.arg(Arg::with_name("inputs").takes_value(true).required(true)) - .arg( - Arg::with_name("unique_inputs") - .takes_value(true) - .required(true), - ) -} diff --git a/src/agent/onefuzz-agent/src/debug/mod.rs b/src/agent/onefuzz-agent/src/debug/mod.rs deleted file mode 100644 index 0a7fefd34..000000000 --- a/src/agent/onefuzz-agent/src/debug/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -pub mod cmd; -pub mod libfuzzer_merge; diff --git a/src/agent/onefuzz-agent/src/local/cmd.rs b/src/agent/onefuzz-agent/src/local/cmd.rs index 4bf17e400..c20a5bca7 100644 --- a/src/agent/onefuzz-agent/src/local/cmd.rs +++ b/src/agent/onefuzz-agent/src/local/cmd.rs @@ -5,8 +5,9 @@ use anyhow::Result; use clap::{App, SubCommand}; use crate::local::{ - common::add_common_config, generic_crash_report, generic_generator, libfuzzer, - libfuzzer_coverage, libfuzzer_crash_report, libfuzzer_fuzz, radamsa, + common::add_common_config, generic_analysis, generic_crash_report, generic_generator, + libfuzzer, libfuzzer_coverage, libfuzzer_crash_report, libfuzzer_fuzz, libfuzzer_merge, + radamsa, }; const RADAMSA: &str = "radamsa"; @@ -14,8 +15,10 @@ const LIBFUZZER: &str = "libfuzzer"; const LIBFUZZER_FUZZ: &str = "libfuzzer-fuzz"; const LIBFUZZER_CRASH_REPORT: &str = "libfuzzer-crash-report"; const LIBFUZZER_COVERAGE: &str = "libfuzzer-coverage"; +const LIBFUZZER_MERGE: &str = "libfuzzer-merge"; const GENERIC_CRASH_REPORT: &str = "generic-crash-report"; const GENERIC_GENERATOR: &str = "generic-generator"; +const GENERIC_ANALYSIS: &str = "generic-analysis"; pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { match args.subcommand() { @@ -24,6 +27,8 @@ pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { (LIBFUZZER_FUZZ, Some(sub)) => libfuzzer_fuzz::run(sub).await, (LIBFUZZER_COVERAGE, Some(sub)) => libfuzzer_coverage::run(sub).await, (LIBFUZZER_CRASH_REPORT, Some(sub)) => libfuzzer_crash_report::run(sub).await, + (LIBFUZZER_MERGE, Some(sub)) => libfuzzer_merge::run(sub).await, + (GENERIC_ANALYSIS, Some(sub)) => generic_analysis::run(sub).await, (GENERIC_CRASH_REPORT, Some(sub)) => generic_crash_report::run(sub).await, (GENERIC_GENERATOR, Some(sub)) => generic_generator::run(sub).await, _ => { @@ -41,6 +46,7 @@ pub fn args(name: &str) -> App<'static, 'static> { .subcommand(add_common_config(libfuzzer_coverage::args( LIBFUZZER_COVERAGE, ))) + .subcommand(add_common_config(libfuzzer_merge::args(LIBFUZZER_MERGE))) .subcommand(add_common_config(libfuzzer_crash_report::args( LIBFUZZER_CRASH_REPORT, ))) @@ -50,4 +56,5 @@ pub fn args(name: &str) -> App<'static, 'static> { .subcommand(add_common_config(generic_generator::args( GENERIC_GENERATOR, ))) + .subcommand(add_common_config(generic_analysis::args(GENERIC_ANALYSIS))) } diff --git a/src/agent/onefuzz-agent/src/local/common.rs b/src/agent/onefuzz-agent/src/local/common.rs index bcced4abc..68c4a5505 100644 --- a/src/agent/onefuzz-agent/src/local/common.rs +++ b/src/agent/onefuzz-agent/src/local/common.rs @@ -2,10 +2,20 @@ use crate::tasks::config::CommonConfig; use crate::tasks::utils::parse_key_value; use anyhow::Result; use clap::{App, Arg, ArgMatches}; -use std::{collections::HashMap, path::PathBuf}; - +use onefuzz::jitter::delay_with_jitter; +use onefuzz::{blob::BlobContainerUrl, monitor::DirectoryMonitor, syncdir::SyncedDir}; +use reqwest::Url; +use std::{ + collections::HashMap, + path::{Path, PathBuf}, + time::Duration, +}; use uuid::Uuid; +use backoff::{future::retry, Error as BackoffError, ExponentialBackoff}; +use path_absolutize::Absolutize; +use std::task::Poll; + pub const SETUP_DIR: &str = "setup_dir"; pub const INPUTS_DIR: &str = "inputs_dir"; pub const CRASHES_DIR: &str = "crashes_dir"; @@ -33,46 +43,30 @@ pub const GENERATOR_EXE: &str = "generator_exe"; pub const GENERATOR_ENV: &str = "generator_env"; pub const GENERATOR_OPTIONS: &str = "generator_options"; +pub const ANALYZER_EXE: &str = "analyzer_exe"; +pub const ANALYZER_OPTIONS: &str = "analyzer_options"; +pub const ANALYZER_ENV: &str = "analyzer_env"; +pub const ANALYSIS_DIR: &str = "analysis_dir"; +pub const ANALYSIS_INPUTS: &str = "analysis_inputs"; +pub const ANALYSIS_UNIQUE_INPUTS: &str = "analysis_unique_inputs"; +pub const PRESERVE_EXISTING_OUTPUTS: &str = "preserve_existing_outputs"; + +const WAIT_FOR_MAX_WAIT: Duration = Duration::from_secs(10); +const WAIT_FOR_DIR_DELAY: Duration = Duration::from_secs(1); + pub enum CmdType { Target, Generator, // Supervisor, } -pub fn add_cmd_options( - cmd_type: CmdType, - exe: bool, - arg: bool, - env: bool, - mut app: App<'static, 'static>, -) -> App<'static, 'static> { - let (exe_name, env_name, arg_name) = match cmd_type { - CmdType::Target => (TARGET_EXE, TARGET_ENV, TARGET_OPTIONS), - // CmdType::Supervisor => (SUPERVISOR_EXE, SUPERVISOR_ENV, SUPERVISOR_OPTIONS), - CmdType::Generator => (GENERATOR_EXE, GENERATOR_ENV, GENERATOR_OPTIONS), - }; - - if exe { - app = app.arg(Arg::with_name(exe_name).takes_value(true).required(true)); +pub fn get_hash_map(args: &clap::ArgMatches<'_>, name: &str) -> Result> { + let mut env = HashMap::new(); + for opt in args.values_of_lossy(name).unwrap_or_default() { + let (k, v) = parse_key_value(opt)?; + env.insert(k, v); } - if env { - app = app.arg( - Arg::with_name(env_name) - .long(env_name) - .takes_value(true) - .multiple(true), - ) - } - if arg { - app = app.arg( - Arg::with_name(arg_name) - .long(arg_name) - .takes_value(true) - .value_delimiter(" ") - .help("Use a quoted string with space separation to denote multiple arguments"), - ) - } - app + Ok(env) } pub fn get_cmd_exe(cmd_type: CmdType, args: &clap::ArgMatches<'_>) -> Result { @@ -105,13 +99,7 @@ pub fn get_cmd_env( // CmdType::Supervisor => SUPERVISOR_ENV, CmdType::Generator => GENERATOR_ENV, }; - - let mut env = HashMap::new(); - for opt in args.values_of_lossy(env_name).unwrap_or_default() { - let (k, v) = parse_key_value(opt)?; - env.insert(k, v); - } - Ok(env) + get_hash_map(args, env_name) } pub fn add_common_config(app: App<'static, 'static>) -> App<'static, 'static> { @@ -142,17 +130,56 @@ pub fn add_common_config(app: App<'static, 'static>) -> App<'static, 'static> { } fn get_uuid(name: &str, args: &ArgMatches<'_>) -> Result { - match value_t!(args, name, String) { - Ok(x) => Uuid::parse_str(&x) - .map_err(|x| format_err!("invalid {}. uuid expected. {})", name, x)), - Err(_) => Ok(Uuid::nil()), - } + value_t!(args, name, String).map(|x| { + Uuid::parse_str(&x).map_err(|x| format_err!("invalid {}. uuid expected. {})", name, x)) + })? +} + +pub fn get_synced_dirs( + name: &str, + job_id: Uuid, + task_id: Uuid, + args: &ArgMatches<'_>, +) -> Result> { + let current_dir = std::env::current_dir()?; + let dirs: Result> = value_t!(args, name, PathBuf)? + .iter() + .enumerate() + .map(|(index, remote_path)| { + let path = PathBuf::from(remote_path); + let remote_path = path.absolutize()?; + let remote_url = Url::from_file_path(remote_path).expect("invalid file path"); + let remote_blob_url = BlobContainerUrl::new(remote_url).expect("invalid url"); + let path = current_dir.join(format!("{}/{}/{}_{}", job_id, task_id, name, index)); + Ok(SyncedDir { + url: remote_blob_url, + path, + }) + }) + .collect(); + Ok(dirs?) +} + +pub fn get_synced_dir( + name: &str, + job_id: Uuid, + task_id: Uuid, + args: &ArgMatches<'_>, +) -> Result { + let remote_path = value_t!(args, name, PathBuf)?.absolutize()?.into_owned(); + let remote_url = Url::from_file_path(remote_path).map_err(|_| anyhow!("invalid file path"))?; + let remote_blob_url = BlobContainerUrl::new(remote_url)?; + let path = std::env::current_dir()?.join(format!("{}/{}/{}", job_id, task_id, name)); + Ok(SyncedDir { + url: remote_blob_url, + path, + }) } pub fn build_common_config(args: &ArgMatches<'_>) -> Result { - let job_id = get_uuid("job_id", args)?; - let task_id = get_uuid("task_id", args)?; - let instance_id = get_uuid("instance_id", args)?; + let job_id = get_uuid("job_id", args).unwrap_or_else(|_| Uuid::nil()); + let task_id = get_uuid("task_id", args).unwrap_or_else(|_| Uuid::new_v4()); + let instance_id = get_uuid("instance_id", args).unwrap_or_else(|_| Uuid::nil()); let setup_dir = if args.is_present(SETUP_DIR) { value_t!(args, SETUP_DIR, PathBuf)? @@ -174,3 +201,67 @@ pub fn build_common_config(args: &ArgMatches<'_>) -> Result { }; Ok(config) } + +/// Information about a local path being monitored +/// A new notification will be received on the queue url +/// For each new file added to the directory +pub struct DirectoryMonitorQueue { + pub directory_path: PathBuf, + pub queue_client: storage_queue::QueueClient, + pub handle: tokio::task::JoinHandle>, +} + +impl DirectoryMonitorQueue { + pub async fn start_monitoring(directory_path: impl AsRef) -> Result { + let directory_path = PathBuf::from(directory_path.as_ref()); + let directory_path_clone = directory_path.clone(); + let queue_client = storage_queue::QueueClient::Channel( + storage_queue::local_queue::ChannelQueueClient::new()?, + ); + let queue = queue_client.clone(); + let handle: tokio::task::JoinHandle> = tokio::spawn(async move { + let mut monitor = DirectoryMonitor::new(directory_path_clone.clone()); + monitor.start()?; + loop { + match monitor.poll_file() { + Poll::Ready(Some(file_path)) => { + let file_url = Url::from_file_path(file_path) + .map_err(|_| anyhow!("invalid file path"))?; + queue.enqueue(file_url).await?; + } + Poll::Ready(None) => break, + Poll::Pending => delay_with_jitter(Duration::from_secs(1)).await, + } + } + Ok(()) + }); + + Ok(DirectoryMonitorQueue { + directory_path, + queue_client, + handle, + }) + } +} + +pub async fn wait_for_dir(path: impl AsRef) -> Result<()> { + let op = || async { + if path.as_ref().exists() { + Ok(()) + } else { + Err(BackoffError::Transient(anyhow::anyhow!( + "path '{:?}' does not exisit", + path.as_ref() + ))) + } + }; + retry( + ExponentialBackoff { + max_elapsed_time: Some(WAIT_FOR_MAX_WAIT), + max_interval: WAIT_FOR_DIR_DELAY, + ..ExponentialBackoff::default() + }, + op, + ) + .await +} diff --git a/src/agent/onefuzz-agent/src/local/generic_analysis.rs b/src/agent/onefuzz-agent/src/local/generic_analysis.rs new file mode 100644 index 000000000..f7ff604ba --- /dev/null +++ b/src/agent/onefuzz-agent/src/local/generic_analysis.rs @@ -0,0 +1,104 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use crate::{ + local::common::{ + build_common_config, get_cmd_arg, get_cmd_exe, get_hash_map, get_synced_dir, CmdType, + ANALYSIS_DIR, ANALYZER_ENV, ANALYZER_EXE, ANALYZER_OPTIONS, CRASHES_DIR, NO_REPRO_DIR, + REPORTS_DIR, TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, TOOLS_DIR, UNIQUE_REPORTS_DIR, + }, + tasks::{ + analysis::generic::{run as run_analysis, Config}, + config::CommonConfig, + }, +}; +use anyhow::Result; +use clap::{App, Arg, SubCommand}; +use storage_queue::QueueClient; + +pub fn build_analysis_config( + args: &clap::ArgMatches<'_>, + input_queue: Option, + common: CommonConfig, +) -> Result { + let target_exe = get_cmd_exe(CmdType::Target, args)?.into(); + let target_options = get_cmd_arg(CmdType::Target, args); + + let analyzer_exe = value_t!(args, ANALYZER_EXE, String)?; + let analyzer_options = args.values_of_lossy(ANALYZER_OPTIONS).unwrap_or_default(); + let analyzer_env = get_hash_map(args, ANALYZER_ENV)?; + let analysis = get_synced_dir(ANALYSIS_DIR, common.job_id, common.task_id, args)?; + let tools = get_synced_dir(TOOLS_DIR, common.job_id, common.task_id, args)?; + let crashes = get_synced_dir(CRASHES_DIR, common.job_id, common.task_id, args).ok(); + let reports = get_synced_dir(REPORTS_DIR, common.job_id, common.task_id, args).ok(); + let no_repro = get_synced_dir(NO_REPRO_DIR, common.job_id, common.task_id, args).ok(); + let unique_reports = + get_synced_dir(UNIQUE_REPORTS_DIR, common.job_id, common.task_id, args).ok(); + + let config = Config { + target_exe, + target_options, + crashes, + input_queue, + analyzer_exe, + analyzer_options, + analyzer_env, + analysis, + tools, + common, + reports, + unique_reports, + no_repro, + }; + + Ok(config) +} + +pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { + let common = build_common_config(args)?; + let config = build_analysis_config(args, None, common)?; + run_analysis(config).await +} + +pub fn build_shared_args() -> Vec> { + vec![ + Arg::with_name(TARGET_EXE) + .long(TARGET_EXE) + .takes_value(true) + .required(true), + Arg::with_name(TARGET_ENV) + .long(TARGET_ENV) + .takes_value(true) + .multiple(true), + Arg::with_name(TARGET_OPTIONS) + .default_value("{input}") + .long(TARGET_OPTIONS) + .takes_value(true) + .value_delimiter(" ") + .help("Use a quoted string with space separation to denote multiple arguments"), + Arg::with_name(CRASHES_DIR) + .long(CRASHES_DIR) + .takes_value(true) + .required(true), + Arg::with_name(ANALYZER_EXE) + .takes_value(true) + .required(true), + Arg::with_name(ANALYZER_OPTIONS) + .takes_value(true) + .value_delimiter(" ") + .help("Use a quoted string with space separation to denote multiple arguments"), + Arg::with_name(ANALYZER_ENV) + .takes_value(true) + .multiple(true), + Arg::with_name(ANALYSIS_DIR) + .takes_value(true) + .required(true), + Arg::with_name(TOOLS_DIR).takes_value(true).required(false), + ] +} + +pub fn args(name: &'static str) -> App<'static, 'static> { + SubCommand::with_name(name) + .about("execute a local-only generic analysis") + .args(&build_shared_args()) +} diff --git a/src/agent/onefuzz-agent/src/local/generic_crash_report.rs b/src/agent/onefuzz-agent/src/local/generic_crash_report.rs index fb19eb174..5ef046e64 100644 --- a/src/agent/onefuzz-agent/src/local/generic_crash_report.rs +++ b/src/agent/onefuzz-agent/src/local/generic_crash_report.rs @@ -3,33 +3,43 @@ use crate::{ local::common::{ - build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, CmdType, CHECK_ASAN_LOG, - CHECK_RETRY_COUNT, CRASHES_DIR, DISABLE_CHECK_QUEUE, NO_REPRO_DIR, REPORTS_DIR, TARGET_ENV, - TARGET_EXE, TARGET_OPTIONS, TARGET_TIMEOUT, UNIQUE_REPORTS_DIR, + build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, get_synced_dir, CmdType, + CHECK_ASAN_LOG, CHECK_RETRY_COUNT, CRASHES_DIR, DISABLE_CHECK_QUEUE, NO_REPRO_DIR, + REPORTS_DIR, TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, TARGET_TIMEOUT, UNIQUE_REPORTS_DIR, + }, + tasks::{ + config::CommonConfig, + report::generic::{Config, ReportTask}, }, - tasks::report::generic::{Config, ReportTask}, }; use anyhow::Result; use clap::{App, Arg, SubCommand}; -use std::path::PathBuf; +use storage_queue::QueueClient; -pub fn build_report_config(args: &clap::ArgMatches<'_>) -> Result { +pub fn build_report_config( + args: &clap::ArgMatches<'_>, + input_queue: Option, + common: CommonConfig, +) -> Result { let target_exe = get_cmd_exe(CmdType::Target, args)?.into(); let target_env = get_cmd_env(CmdType::Target, args)?; let target_options = get_cmd_arg(CmdType::Target, args); - let crashes = Some(value_t!(args, CRASHES_DIR, PathBuf)?.into()); - let reports = if args.is_present(REPORTS_DIR) { - Some(value_t!(args, REPORTS_DIR, PathBuf)?).map(|x| x.into()) - } else { - None - }; - let no_repro = if args.is_present(NO_REPRO_DIR) { - Some(value_t!(args, NO_REPRO_DIR, PathBuf)?).map(|x| x.into()) - } else { - None - }; - let unique_reports = Some(value_t!(args, UNIQUE_REPORTS_DIR, PathBuf)?.into()); + let crashes = Some(get_synced_dir( + CRASHES_DIR, + common.job_id, + common.task_id, + args, + )?); + let reports = get_synced_dir(REPORTS_DIR, common.job_id, common.task_id, args).ok(); + let no_repro = get_synced_dir(NO_REPRO_DIR, common.job_id, common.task_id, args).ok(); + + let unique_reports = Some(get_synced_dir( + UNIQUE_REPORTS_DIR, + common.job_id, + common.task_id, + args, + )?); let target_timeout = value_t!(args, TARGET_TIMEOUT, u64).ok(); @@ -38,8 +48,6 @@ pub fn build_report_config(args: &clap::ArgMatches<'_>) -> Result { let check_asan_log = args.is_present(CHECK_ASAN_LOG); let check_debugger = !args.is_present("disable_check_debugger"); - let common = build_common_config(args)?; - let config = Config { target_exe, target_env, @@ -50,7 +58,7 @@ pub fn build_report_config(args: &clap::ArgMatches<'_>) -> Result { check_retry_count, check_queue, crashes, - input_queue: None, + input_queue, no_repro, reports, unique_reports, @@ -61,8 +69,9 @@ pub fn build_report_config(args: &clap::ArgMatches<'_>) -> Result { } pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { - let config = build_report_config(args)?; - ReportTask::new(config).local_run().await + let common = build_common_config(args)?; + let config = build_report_config(args, None, common)?; + ReportTask::new(config).managed_run().await } pub fn build_shared_args() -> Vec> { diff --git a/src/agent/onefuzz-agent/src/local/generic_generator.rs b/src/agent/onefuzz-agent/src/local/generic_generator.rs index e45207866..90c0d42a1 100644 --- a/src/agent/onefuzz-agent/src/local/generic_generator.rs +++ b/src/agent/onefuzz-agent/src/local/generic_generator.rs @@ -3,19 +3,21 @@ use crate::{ local::common::{ - build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, CmdType, CHECK_ASAN_LOG, - CHECK_RETRY_COUNT, CRASHES_DIR, GENERATOR_ENV, GENERATOR_EXE, GENERATOR_OPTIONS, - READONLY_INPUTS, RENAME_OUTPUT, TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, TARGET_TIMEOUT, - TOOLS_DIR, + build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, get_synced_dir, + get_synced_dirs, CmdType, CHECK_ASAN_LOG, CHECK_RETRY_COUNT, CRASHES_DIR, GENERATOR_ENV, + GENERATOR_EXE, GENERATOR_OPTIONS, READONLY_INPUTS, RENAME_OUTPUT, TARGET_ENV, TARGET_EXE, + TARGET_OPTIONS, TARGET_TIMEOUT, TOOLS_DIR, + }, + tasks::{ + config::CommonConfig, + fuzz::generator::{Config, GeneratorTask}, }, - tasks::fuzz::generator::{Config, GeneratorTask}, }; use anyhow::Result; use clap::{App, Arg, SubCommand}; -use std::path::PathBuf; -pub fn build_fuzz_config(args: &clap::ArgMatches<'_>) -> Result { - let crashes = value_t!(args, CRASHES_DIR, PathBuf)?.into(); +pub fn build_fuzz_config(args: &clap::ArgMatches<'_>, common: CommonConfig) -> Result { + let crashes = get_synced_dir(CRASHES_DIR, common.job_id, common.task_id, args)?; let target_exe = get_cmd_exe(CmdType::Target, args)?.into(); let target_options = get_cmd_arg(CmdType::Target, args); let target_env = get_cmd_env(CmdType::Target, args)?; @@ -23,11 +25,7 @@ pub fn build_fuzz_config(args: &clap::ArgMatches<'_>) -> Result { let generator_exe = get_cmd_exe(CmdType::Generator, args)?; let generator_options = get_cmd_arg(CmdType::Generator, args); let generator_env = get_cmd_env(CmdType::Generator, args)?; - - let readonly_inputs = values_t!(args, READONLY_INPUTS, PathBuf)? - .iter() - .map(|x| x.to_owned().into()) - .collect(); + let readonly_inputs = get_synced_dirs(READONLY_INPUTS, common.job_id, common.task_id, args)?; let rename_output = args.is_present(RENAME_OUTPUT); let check_asan_log = args.is_present(CHECK_ASAN_LOG); @@ -35,14 +33,10 @@ pub fn build_fuzz_config(args: &clap::ArgMatches<'_>) -> Result { let check_retry_count = value_t!(args, CHECK_RETRY_COUNT, u64)?; let target_timeout = Some(value_t!(args, TARGET_TIMEOUT, u64)?); - let tools = if args.is_present(TOOLS_DIR) { - Some(value_t!(args, TOOLS_DIR, PathBuf)?.into()) - } else { - None - }; + let tools = get_synced_dir(TOOLS_DIR, common.job_id, common.task_id, args).ok(); let ensemble_sync_delay = None; - let common = build_common_config(args)?; + let config = Config { tools, generator_exe, @@ -66,7 +60,8 @@ pub fn build_fuzz_config(args: &clap::ArgMatches<'_>) -> Result { } pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { - let config = build_fuzz_config(args)?; + let common = build_common_config(args)?; + let config = build_fuzz_config(args, common)?; GeneratorTask::new(config).run().await } diff --git a/src/agent/onefuzz-agent/src/local/libfuzzer.rs b/src/agent/onefuzz-agent/src/local/libfuzzer.rs index 2f3c2e9cb..5ea7c5881 100644 --- a/src/agent/onefuzz-agent/src/local/libfuzzer.rs +++ b/src/agent/onefuzz-agent/src/local/libfuzzer.rs @@ -3,46 +3,102 @@ use crate::{ local::{ - common::COVERAGE_DIR, + common::{ + build_common_config, wait_for_dir, DirectoryMonitorQueue, ANALYZER_EXE, COVERAGE_DIR, + UNIQUE_REPORTS_DIR, + }, + generic_analysis::build_analysis_config, libfuzzer_coverage::{build_coverage_config, build_shared_args as build_coverage_args}, libfuzzer_crash_report::{build_report_config, build_shared_args as build_crash_args}, libfuzzer_fuzz::{build_fuzz_config, build_shared_args as build_fuzz_args}, }, tasks::{ + analysis::generic::run as run_analysis, config::CommonConfig, coverage::libfuzzer_coverage::CoverageTask, fuzz::libfuzzer_fuzz::LibFuzzerFuzzTask, report::libfuzzer_report::ReportTask, }, }; use anyhow::Result; use clap::{App, SubCommand}; + +use onefuzz::utils::try_wait_all_join_handles; use std::collections::HashSet; use tokio::task::spawn; +use uuid::Uuid; pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { - let fuzz_config = build_fuzz_config(args)?; + let common = build_common_config(args)?; + let fuzz_config = build_fuzz_config(args, common.clone())?; + let crash_dir = fuzz_config + .crashes + .url + .as_file_path() + .expect("invalid crash dir remote location"); + let fuzzer = LibFuzzerFuzzTask::new(fuzz_config)?; fuzzer.check_libfuzzer().await?; - let fuzz_task = spawn(async move { fuzzer.run().await }); + let mut task_handles = vec![]; - let report_config = build_report_config(args)?; - let report = ReportTask::new(report_config); - let report_task = spawn(async move { report.local_run().await }); + let fuzz_task = spawn(async move { fuzzer.managed_run().await }); + + wait_for_dir(&crash_dir).await?; + + task_handles.push(fuzz_task); + if args.is_present(UNIQUE_REPORTS_DIR) { + let crash_report_input_monitor = + DirectoryMonitorQueue::start_monitoring(crash_dir.clone()).await?; + + let report_config = build_report_config( + args, + Some(crash_report_input_monitor.queue_client), + CommonConfig { + task_id: Uuid::new_v4(), + ..common.clone() + }, + )?; + let mut report = ReportTask::new(report_config); + let report_task = spawn(async move { report.managed_run().await }); + task_handles.push(report_task); + task_handles.push(crash_report_input_monitor.handle); + } if args.is_present(COVERAGE_DIR) { - let coverage_config = build_coverage_config(args, true)?; - let coverage = CoverageTask::new(coverage_config); - let coverage_task = spawn(async move { coverage.local_run().await }); + let coverage_input_monitor = + DirectoryMonitorQueue::start_monitoring(crash_dir.clone()).await?; + let coverage_config = build_coverage_config( + args, + true, + Some(coverage_input_monitor.queue_client), + CommonConfig { + task_id: Uuid::new_v4(), + ..common.clone() + }, + )?; + let mut coverage = CoverageTask::new(coverage_config); + let coverage_task = spawn(async move { coverage.managed_run().await }); - let result = tokio::try_join!(fuzz_task, report_task, coverage_task)?; - result.0?; - result.1?; - result.2?; - } else { - let result = tokio::try_join!(fuzz_task, report_task)?; - result.0?; - result.1?; + task_handles.push(coverage_task); + task_handles.push(coverage_input_monitor.handle); } + if args.is_present(ANALYZER_EXE) { + let analysis_input_monitor = DirectoryMonitorQueue::start_monitoring(crash_dir).await?; + let analysis_config = build_analysis_config( + args, + Some(analysis_input_monitor.queue_client), + CommonConfig { + task_id: Uuid::new_v4(), + ..common + }, + )?; + let analysis_task = spawn(async move { run_analysis(analysis_config).await }); + + task_handles.push(analysis_task); + task_handles.push(analysis_input_monitor.handle); + } + + try_wait_all_join_handles(task_handles).await?; + Ok(()) } diff --git a/src/agent/onefuzz-agent/src/local/libfuzzer_coverage.rs b/src/agent/onefuzz-agent/src/local/libfuzzer_coverage.rs index 09ed828ae..9ae385f92 100644 --- a/src/agent/onefuzz-agent/src/local/libfuzzer_coverage.rs +++ b/src/agent/onefuzz-agent/src/local/libfuzzer_coverage.rs @@ -3,39 +3,49 @@ use crate::{ local::common::{ - build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, CmdType, CHECK_FUZZER_HELP, - COVERAGE_DIR, INPUTS_DIR, READONLY_INPUTS, TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, + build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, get_synced_dir, + get_synced_dirs, CmdType, CHECK_FUZZER_HELP, COVERAGE_DIR, INPUTS_DIR, READONLY_INPUTS, + TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, + }, + tasks::{ + config::CommonConfig, + coverage::libfuzzer_coverage::{Config, CoverageTask}, }, - tasks::coverage::libfuzzer_coverage::{Config, CoverageTask}, }; use anyhow::Result; use clap::{App, Arg, SubCommand}; -use std::path::PathBuf; +use storage_queue::QueueClient; -pub fn build_coverage_config(args: &clap::ArgMatches<'_>, local_job: bool) -> Result { +pub fn build_coverage_config( + args: &clap::ArgMatches<'_>, + local_job: bool, + input_queue: Option, + common: CommonConfig, +) -> Result { let target_exe = get_cmd_exe(CmdType::Target, args)?.into(); let target_env = get_cmd_env(CmdType::Target, args)?; let target_options = get_cmd_arg(CmdType::Target, args); let readonly_inputs = if local_job { - vec![value_t!(args, INPUTS_DIR, PathBuf)?.into()] + vec![get_synced_dir( + INPUTS_DIR, + common.job_id, + common.task_id, + args, + )?] } else { - values_t!(args, READONLY_INPUTS, PathBuf)? - .iter() - .map(|x| x.to_owned().into()) - .collect() + get_synced_dirs(READONLY_INPUTS, common.job_id, common.task_id, args)? }; - let coverage = value_t!(args, COVERAGE_DIR, PathBuf)?.into(); + let coverage = get_synced_dir(COVERAGE_DIR, common.job_id, common.task_id, args)?; let check_fuzzer_help = args.is_present(CHECK_FUZZER_HELP); - let common = build_common_config(args)?; let config = Config { target_exe, target_env, target_options, check_fuzzer_help, - input_queue: None, + input_queue, readonly_inputs, coverage, common, @@ -45,10 +55,11 @@ pub fn build_coverage_config(args: &clap::ArgMatches<'_>, local_job: bool) -> Re } pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { - let config = build_coverage_config(args, false)?; + let common = build_common_config(args)?; + let config = build_coverage_config(args, false, None, common)?; - let task = CoverageTask::new(config); - task.local_run().await + let mut task = CoverageTask::new(config); + task.managed_run().await } pub fn build_shared_args(local_job: bool) -> Vec> { diff --git a/src/agent/onefuzz-agent/src/local/libfuzzer_crash_report.rs b/src/agent/onefuzz-agent/src/local/libfuzzer_crash_report.rs index a233b07d7..f9ddbd981 100644 --- a/src/agent/onefuzz-agent/src/local/libfuzzer_crash_report.rs +++ b/src/agent/onefuzz-agent/src/local/libfuzzer_crash_report.rs @@ -3,41 +3,46 @@ use crate::{ local::common::{ - build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, CmdType, CHECK_FUZZER_HELP, - CHECK_RETRY_COUNT, CRASHES_DIR, DISABLE_CHECK_QUEUE, NO_REPRO_DIR, REPORTS_DIR, TARGET_ENV, - TARGET_EXE, TARGET_OPTIONS, TARGET_TIMEOUT, UNIQUE_REPORTS_DIR, + build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, get_synced_dir, CmdType, + CHECK_FUZZER_HELP, CHECK_RETRY_COUNT, CRASHES_DIR, DISABLE_CHECK_QUEUE, NO_REPRO_DIR, + REPORTS_DIR, TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, TARGET_TIMEOUT, UNIQUE_REPORTS_DIR, + }, + tasks::{ + config::CommonConfig, + report::libfuzzer_report::{Config, ReportTask}, }, - tasks::report::libfuzzer_report::{Config, ReportTask}, }; use anyhow::Result; use clap::{App, Arg, SubCommand}; -use std::path::PathBuf; +use storage_queue::QueueClient; -pub fn build_report_config(args: &clap::ArgMatches<'_>) -> Result { +pub fn build_report_config( + args: &clap::ArgMatches<'_>, + input_queue: Option, + common: CommonConfig, +) -> Result { let target_exe = get_cmd_exe(CmdType::Target, args)?.into(); let target_env = get_cmd_env(CmdType::Target, args)?; let target_options = get_cmd_arg(CmdType::Target, args); - let crashes = Some(value_t!(args, CRASHES_DIR, PathBuf)?.into()); - let reports = if args.is_present(REPORTS_DIR) { - Some(value_t!(args, REPORTS_DIR, PathBuf)?).map(|x| x.into()) - } else { - None - }; - let no_repro = if args.is_present(NO_REPRO_DIR) { - Some(value_t!(args, NO_REPRO_DIR, PathBuf)?).map(|x| x.into()) - } else { - None - }; - let unique_reports = Some(value_t!(args, UNIQUE_REPORTS_DIR, PathBuf)?.into()); + let crashes = get_synced_dir(CRASHES_DIR, common.job_id, common.task_id, args).ok(); + let reports = get_synced_dir(REPORTS_DIR, common.job_id, common.task_id, args).ok(); + + let no_repro = get_synced_dir(NO_REPRO_DIR, common.job_id, common.task_id, args).ok(); + + let unique_reports = + get_synced_dir(UNIQUE_REPORTS_DIR, common.job_id, common.task_id, args).ok(); let target_timeout = value_t!(args, TARGET_TIMEOUT, u64).ok(); let check_retry_count = value_t!(args, CHECK_RETRY_COUNT, u64)?; + let check_queue = !args.is_present(DISABLE_CHECK_QUEUE); + let check_fuzzer_help = args.is_present(CHECK_FUZZER_HELP); - let common = build_common_config(args)?; + let crashes = if input_queue.is_none() { crashes } else { None }; + let config = Config { target_exe, target_env, @@ -45,7 +50,7 @@ pub fn build_report_config(args: &clap::ArgMatches<'_>) -> Result { target_timeout, check_retry_count, check_fuzzer_help, - input_queue: None, + input_queue, check_queue, crashes, reports, @@ -57,8 +62,9 @@ pub fn build_report_config(args: &clap::ArgMatches<'_>) -> Result { } pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { - let config = build_report_config(args)?; - ReportTask::new(config).local_run().await + let common = build_common_config(args)?; + let config = build_report_config(args, None, common)?; + ReportTask::new(config).managed_run().await } pub fn build_shared_args() -> Vec> { diff --git a/src/agent/onefuzz-agent/src/local/libfuzzer_fuzz.rs b/src/agent/onefuzz-agent/src/local/libfuzzer_fuzz.rs index 0e859d23b..8882ed6fc 100644 --- a/src/agent/onefuzz-agent/src/local/libfuzzer_fuzz.rs +++ b/src/agent/onefuzz-agent/src/local/libfuzzer_fuzz.rs @@ -3,20 +3,23 @@ use crate::{ local::common::{ - build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, CmdType, CHECK_FUZZER_HELP, - CRASHES_DIR, INPUTS_DIR, TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, TARGET_WORKERS, + build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, get_synced_dir, CmdType, + CHECK_FUZZER_HELP, CRASHES_DIR, INPUTS_DIR, TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, + TARGET_WORKERS, + }, + tasks::{ + config::CommonConfig, + fuzz::libfuzzer_fuzz::{Config, LibFuzzerFuzzTask}, }, - tasks::fuzz::libfuzzer_fuzz::{Config, LibFuzzerFuzzTask}, }; use anyhow::Result; use clap::{App, Arg, SubCommand}; -use std::path::PathBuf; const DISABLE_EXPECT_CRASH_ON_FAILURE: &str = "disable_expect_crash_on_failure"; -pub fn build_fuzz_config(args: &clap::ArgMatches<'_>) -> Result { - let crashes = value_t!(args, CRASHES_DIR, PathBuf)?.into(); - let inputs = value_t!(args, INPUTS_DIR, PathBuf)?.into(); +pub fn build_fuzz_config(args: &clap::ArgMatches<'_>, common: CommonConfig) -> Result { + let crashes = get_synced_dir(CRASHES_DIR, common.job_id, common.task_id, args)?; + let inputs = get_synced_dir(INPUTS_DIR, common.job_id, common.task_id, args)?; let target_exe = get_cmd_exe(CmdType::Target, args)?.into(); let target_env = get_cmd_env(CmdType::Target, args)?; @@ -28,7 +31,7 @@ pub fn build_fuzz_config(args: &clap::ArgMatches<'_>) -> Result { let expect_crash_on_failure = !args.is_present(DISABLE_EXPECT_CRASH_ON_FAILURE); let ensemble_sync_delay = None; - let common = build_common_config(args)?; + let config = Config { inputs, readonly_inputs, @@ -47,7 +50,8 @@ pub fn build_fuzz_config(args: &clap::ArgMatches<'_>) -> Result { } pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { - let config = build_fuzz_config(args)?; + let common = build_common_config(args)?; + let config = build_fuzz_config(args, common)?; LibFuzzerFuzzTask::new(config)?.run().await } diff --git a/src/agent/onefuzz-agent/src/local/libfuzzer_merge.rs b/src/agent/onefuzz-agent/src/local/libfuzzer_merge.rs new file mode 100644 index 000000000..4d0dcc31d --- /dev/null +++ b/src/agent/onefuzz-agent/src/local/libfuzzer_merge.rs @@ -0,0 +1,82 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use crate::{ + local::common::{ + build_common_config, get_cmd_arg, get_cmd_env, get_cmd_exe, get_synced_dir, + get_synced_dirs, CmdType, ANALYSIS_INPUTS, ANALYSIS_UNIQUE_INPUTS, CHECK_FUZZER_HELP, + INPUTS_DIR, PRESERVE_EXISTING_OUTPUTS, TARGET_ENV, TARGET_EXE, TARGET_OPTIONS, + }, + tasks::{ + config::CommonConfig, + merge::libfuzzer_merge::{spawn, Config}, + }, +}; +use anyhow::Result; +use clap::{App, Arg, SubCommand}; +use storage_queue::QueueClient; + +pub fn build_merge_config( + args: &clap::ArgMatches<'_>, + input_queue: Option, + common: CommonConfig, +) -> Result { + let target_exe = get_cmd_exe(CmdType::Target, args)?.into(); + let target_env = get_cmd_env(CmdType::Target, args)?; + let target_options = get_cmd_arg(CmdType::Target, args); + let check_fuzzer_help = args.is_present(CHECK_FUZZER_HELP); + let inputs = get_synced_dirs(ANALYSIS_INPUTS, common.job_id, common.task_id, args)?; + let unique_inputs = + get_synced_dir(ANALYSIS_UNIQUE_INPUTS, common.job_id, common.task_id, args)?; + let preserve_existing_outputs = value_t!(args, PRESERVE_EXISTING_OUTPUTS, bool)?; + + let config = Config { + target_exe, + target_env, + target_options, + check_fuzzer_help, + input_queue, + common, + inputs, + unique_inputs, + preserve_existing_outputs, + }; + Ok(config) +} + +pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { + let common = build_common_config(args)?; + let config = build_merge_config(args, None, common)?; + spawn(std::sync::Arc::new(config)).await +} + +pub fn build_shared_args() -> Vec> { + vec![ + Arg::with_name(TARGET_EXE) + .long(TARGET_EXE) + .takes_value(true) + .required(true), + Arg::with_name(TARGET_ENV) + .long(TARGET_ENV) + .takes_value(true) + .multiple(true), + Arg::with_name(TARGET_OPTIONS) + .long(TARGET_OPTIONS) + .takes_value(true) + .value_delimiter(" ") + .help("Use a quoted string with space separation to denote multiple arguments"), + Arg::with_name(CHECK_FUZZER_HELP) + .takes_value(false) + .long(CHECK_FUZZER_HELP), + Arg::with_name(INPUTS_DIR) + .long(INPUTS_DIR) + .takes_value(true) + .multiple(true), + ] +} + +pub fn args(name: &'static str) -> App<'static, 'static> { + SubCommand::with_name(name) + .about("execute a local-only libfuzzer crash report task") + .args(&build_shared_args()) +} diff --git a/src/agent/onefuzz-agent/src/local/mod.rs b/src/agent/onefuzz-agent/src/local/mod.rs index 7bb84bc57..4ed5c9bdd 100644 --- a/src/agent/onefuzz-agent/src/local/mod.rs +++ b/src/agent/onefuzz-agent/src/local/mod.rs @@ -3,10 +3,12 @@ pub mod cmd; pub mod common; +pub mod generic_analysis; pub mod generic_crash_report; pub mod generic_generator; pub mod libfuzzer; pub mod libfuzzer_coverage; pub mod libfuzzer_crash_report; pub mod libfuzzer_fuzz; +pub mod libfuzzer_merge; pub mod radamsa; diff --git a/src/agent/onefuzz-agent/src/local/radamsa.rs b/src/agent/onefuzz-agent/src/local/radamsa.rs index e7fda771f..b2a2ffb41 100644 --- a/src/agent/onefuzz-agent/src/local/radamsa.rs +++ b/src/agent/onefuzz-agent/src/local/radamsa.rs @@ -3,28 +3,48 @@ use crate::{ local::{ + common::{build_common_config, DirectoryMonitorQueue}, generic_crash_report::{build_report_config, build_shared_args as build_crash_args}, generic_generator::{build_fuzz_config, build_shared_args as build_fuzz_args}, }, - tasks::{fuzz::generator::GeneratorTask, report::generic::ReportTask}, + tasks::{config::CommonConfig, fuzz::generator::GeneratorTask, report::generic::ReportTask}, }; use anyhow::Result; use clap::{App, SubCommand}; +use onefuzz::utils::try_wait_all_join_handles; use std::collections::HashSet; use tokio::task::spawn; +use uuid::Uuid; pub async fn run(args: &clap::ArgMatches<'_>) -> Result<()> { - let fuzz_config = build_fuzz_config(args)?; + let common = build_common_config(args)?; + let fuzz_config = build_fuzz_config(args, common.clone())?; + let crash_dir = fuzz_config + .crashes + .url + .as_file_path() + .expect("invalid crash dir remote location"); + let fuzzer = GeneratorTask::new(fuzz_config); let fuzz_task = spawn(async move { fuzzer.run().await }); - let report_config = build_report_config(args)?; - let report = ReportTask::new(report_config); - let report_task = spawn(async move { report.local_run().await }); + let crash_report_input_monitor = DirectoryMonitorQueue::start_monitoring(crash_dir).await?; + let report_config = build_report_config( + args, + Some(crash_report_input_monitor.queue_client), + CommonConfig { + task_id: Uuid::new_v4(), + ..common + }, + )?; + let report_task = spawn(async move { ReportTask::new(report_config).managed_run().await }); - let result = tokio::try_join!(fuzz_task, report_task)?; - result.0?; - result.1?; + try_wait_all_join_handles(vec![ + fuzz_task, + report_task, + crash_report_input_monitor.handle, + ]) + .await?; Ok(()) } diff --git a/src/agent/onefuzz-agent/src/main.rs b/src/agent/onefuzz-agent/src/main.rs index aff91f680..dfce3c327 100644 --- a/src/agent/onefuzz-agent/src/main.rs +++ b/src/agent/onefuzz-agent/src/main.rs @@ -13,14 +13,12 @@ use anyhow::Result; use clap::{App, ArgMatches, SubCommand}; use std::io::{stdout, Write}; -mod debug; mod local; mod managed; mod tasks; const LICENSE_CMD: &str = "licenses"; const LOCAL_CMD: &str = "local"; -const DEBUG_CMD: &str = "debug"; const MANAGED_CMD: &str = "managed"; fn main() -> Result<()> { @@ -37,7 +35,6 @@ fn main() -> Result<()> { .version(built_version.as_str()) .subcommand(managed::cmd::args(MANAGED_CMD)) .subcommand(local::cmd::args(LOCAL_CMD)) - .subcommand(debug::cmd::args(DEBUG_CMD)) .subcommand(SubCommand::with_name(LICENSE_CMD).about("display third-party licenses")); let matches = app.get_matches(); @@ -49,7 +46,6 @@ fn main() -> Result<()> { async fn run(args: ArgMatches<'_>) -> Result<()> { match args.subcommand() { (LICENSE_CMD, Some(_)) => licenses(), - (DEBUG_CMD, Some(sub)) => debug::cmd::run(sub).await, (LOCAL_CMD, Some(sub)) => local::cmd::run(sub).await, (MANAGED_CMD, Some(sub)) => managed::cmd::run(sub).await, _ => { diff --git a/src/agent/onefuzz-agent/src/tasks/analysis/generic.rs b/src/agent/onefuzz-agent/src/tasks/analysis/generic.rs index 2136d8acb..77047237b 100644 --- a/src/agent/onefuzz-agent/src/tasks/analysis/generic.rs +++ b/src/agent/onefuzz-agent/src/tasks/analysis/generic.rs @@ -6,12 +6,14 @@ use crate::tasks::{ }; use anyhow::{Context, Result}; use futures::stream::StreamExt; -use onefuzz::{az_copy, blob::url::BlobUrl}; +use onefuzz::{az_copy, blob::url::BlobUrl, fs::SyncPath}; use onefuzz::{ - expand::Expand, fs::set_executable, fs::OwnedDir, jitter::delay_with_jitter, - process::monitor_process, syncdir::SyncedDir, + expand::Expand, + fs::{copy, set_executable, OwnedDir}, + jitter::delay_with_jitter, + process::monitor_process, + syncdir::SyncedDir, }; -use reqwest::Url; use serde::Deserialize; use std::process::Stdio; use std::{ @@ -31,7 +33,7 @@ pub struct Config { pub target_exe: PathBuf, pub target_options: Vec, - pub input_queue: Option, + pub input_queue: Option, pub crashes: Option, pub analysis: SyncedDir, @@ -45,7 +47,7 @@ pub struct Config { pub common: CommonConfig, } -pub async fn spawn(config: Config) -> Result<()> { +pub async fn run(config: Config) -> Result<()> { let tmp_dir = PathBuf::from(format!("./{}/tmp", config.common.task_id)); let tmp = OwnedDir::new(tmp_dir); tmp.reset().await?; @@ -120,9 +122,8 @@ async fn run_existing(config: &Config, reports_dir: &Option) -> Result< async fn already_checked(config: &Config, input: &BlobUrl) -> Result { let result = if let Some(crashes) = &config.crashes { - let url = crashes.try_url()?; - url.account() == input.account() - && url.container() == input.container() + crashes.url.account() == input.account() + && crashes.url.container() == input.container() && crashes.path.join(input.name()).exists() } else { false @@ -137,13 +138,13 @@ async fn poll_inputs( reports_dir: &Option, ) -> Result<()> { let heartbeat = config.common.init_heartbeat().await?; - if let Some(queue) = &config.input_queue { - let mut input_queue = QueueClient::new(queue.clone()); - + if let Some(input_queue) = &config.input_queue { loop { heartbeat.alive(); if let Some(message) = input_queue.pop().await? { - let input_url = match BlobUrl::parse(str::from_utf8(message.data())?) { + let input_url = message.parse(|data| BlobUrl::parse(str::from_utf8(data)?)); + + let input_url = match input_url { Ok(url) => url, Err(err) => { error!("could not parse input URL from queue message: {}", err); @@ -152,15 +153,12 @@ async fn poll_inputs( }; if !already_checked(&config, &input_url).await? { - let file_name = input_url.name(); - let mut destination_path = PathBuf::from(tmp_dir.path()); - destination_path.push(file_name); - az_copy::copy(input_url.url().as_ref(), &destination_path, false).await?; + let destination_path = _copy(input_url, &tmp_dir).await?; run_tool(destination_path, &config, &reports_dir).await?; config.analysis.sync_push().await? } - input_queue.delete(message).await?; + message.delete().await?; } else { warn!("no new candidate inputs found, sleeping"); delay_with_jitter(EMPTY_QUEUE_DELAY).await; @@ -171,6 +169,26 @@ async fn poll_inputs( Ok(()) } +async fn _copy(input_url: BlobUrl, destination_folder: &OwnedDir) -> Result { + let file_name = input_url.name(); + let mut destination_path = PathBuf::from(destination_folder.path()); + destination_path.push(file_name); + match input_url { + BlobUrl::AzureBlob(input_url) => { + az_copy::copy(input_url.as_ref(), destination_path.clone(), false).await? + } + BlobUrl::LocalFile(path) => { + copy( + SyncPath::file(path), + SyncPath::dir(destination_path.clone()), + false, + ) + .await? + } + } + Ok(destination_path) +} + pub async fn run_tool( input: impl AsRef, config: &Config, @@ -197,13 +215,13 @@ pub async fn run_tool( tester.reports_dir(&reports_dir) }) .set_optional_ref(&config.crashes, |tester, crashes| { - if let Some(url) = &crashes.url { - tester - .crashes_account(&url.account()) - .crashes_container(&url.container()) - } else { - tester - } + tester + .set_optional_ref(&crashes.url.account(), |tester, account| { + tester.crashes_account(account) + }) + .set_optional_ref(&crashes.url.container(), |tester, container| { + tester.crashes_container(container) + }) }); let analyzer_path = expand.evaluate_value(&config.analyzer_exe)?; diff --git a/src/agent/onefuzz-agent/src/tasks/config.rs b/src/agent/onefuzz-agent/src/tasks/config.rs index 730f21c5a..c70d34a07 100644 --- a/src/agent/onefuzz-agent/src/tasks/config.rs +++ b/src/agent/onefuzz-agent/src/tasks/config.rs @@ -184,7 +184,7 @@ impl Config { .await } Config::LibFuzzerMerge(config) => merge::libfuzzer_merge::spawn(Arc::new(config)).await, - Config::GenericAnalysis(config) => analysis::generic::spawn(config).await, + Config::GenericAnalysis(config) => analysis::generic::run(config).await, Config::GenericGenerator(config) => { fuzz::generator::GeneratorTask::new(config).run().await } diff --git a/src/agent/onefuzz-agent/src/tasks/coverage/libfuzzer_coverage.rs b/src/agent/onefuzz-agent/src/tasks/coverage/libfuzzer_coverage.rs index ebc0cb011..011142504 100644 --- a/src/agent/onefuzz-agent/src/tasks/coverage/libfuzzer_coverage.rs +++ b/src/agent/onefuzz-agent/src/tasks/coverage/libfuzzer_coverage.rs @@ -49,7 +49,7 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; -use storage_queue::Message; +use storage_queue::{Message, QueueClient}; use tokio::fs; const TOTAL_COVERAGE: &str = "total.cov"; @@ -59,7 +59,7 @@ pub struct Config { pub target_exe: PathBuf, pub target_env: HashMap, pub target_options: Vec, - pub input_queue: Option, + pub input_queue: Option, pub readonly_inputs: Vec, pub coverage: SyncedDir, @@ -93,20 +93,6 @@ impl CoverageTask { Self { config, poller } } - pub async fn local_run(&self) -> Result<()> { - let mut processor = CoverageProcessor::new(self.config.clone()).await?; - - self.config.coverage.init().await?; - for synced_dir in &self.config.readonly_inputs { - synced_dir.init().await?; - self.record_corpus_coverage(&mut processor, &synced_dir) - .await?; - } - processor.report_total().await?; - - Ok(()) - } - async fn check_libfuzzer(&self) -> Result<()> { if self.config.check_fuzzer_help { let fuzzer = LibFuzzer::new( @@ -160,7 +146,7 @@ impl CoverageTask { // If a queue has been provided, poll it for new coverage. if let Some(queue) = &self.config.input_queue { info!("polling queue for new coverage"); - let callback = CallbackImpl::new(queue.clone(), processor); + let callback = CallbackImpl::new(queue.clone(), processor)?; self.poller.run(callback).await?; } diff --git a/src/agent/onefuzz-agent/src/tasks/fuzz/generator.rs b/src/agent/onefuzz-agent/src/tasks/fuzz/generator.rs index 3243c50ba..9f6dc94a6 100644 --- a/src/agent/onefuzz-agent/src/tasks/fuzz/generator.rs +++ b/src/agent/onefuzz-agent/src/tasks/fuzz/generator.rs @@ -64,10 +64,8 @@ impl GeneratorTask { pub async fn run(&self) -> Result<()> { self.config.crashes.init().await?; if let Some(tools) = &self.config.tools { - if tools.url.is_some() { - tools.init_pull().await?; - set_executable(&tools.path).await?; - } + tools.init_pull().await?; + set_executable(&tools.path).await?; } let hb_client = self.config.common.init_heartbeat().await?; @@ -207,17 +205,18 @@ mod tests { async fn test_radamsa_linux() -> anyhow::Result<()> { use super::{Config, GeneratorTask}; use crate::tasks::config::CommonConfig; + use onefuzz::blob::BlobContainerUrl; use onefuzz::syncdir::SyncedDir; + use reqwest::Url; use std::collections::HashMap; use std::env; - use std::path::Path; use tempfile::tempdir; let crashes_temp = tempfile::tempdir()?; - let crashes = crashes_temp.path(); + let crashes: &std::path::Path = crashes_temp.path(); - let inputs_temp = tempfile::tempdir().unwrap(); - let inputs = inputs_temp.path(); + let inputs_temp = tempfile::tempdir()?; + let inputs: &std::path::Path = inputs_temp.path(); let input_file = inputs.join("seed.txt"); tokio::fs::write(input_file, "test").await?; @@ -234,23 +233,26 @@ mod tests { .collect(); let radamsa_path = env::var("ONEFUZZ_TEST_RADAMSA_LINUX")?; - let radamsa_as_path = Path::new(&radamsa_path); + let radamsa_as_path = std::path::Path::new(&radamsa_path); let radamsa_dir = radamsa_as_path.parent().unwrap(); + let readonly_inputs_local = tempfile::tempdir().unwrap().path().into(); + let crashes_local = tempfile::tempdir().unwrap().path().into(); + let tools_local = tempfile::tempdir().unwrap().path().into(); let config = Config { generator_exe: String::from("{tools_dir}/radamsa"), generator_options, readonly_inputs: vec![SyncedDir { - path: inputs.to_path_buf(), - url: None, + path: readonly_inputs_local, + url: BlobContainerUrl::parse(Url::from_directory_path(inputs).unwrap())?, }], crashes: SyncedDir { - path: crashes.to_path_buf(), - url: None, + path: crashes_local, + url: BlobContainerUrl::parse(Url::from_directory_path(crashes).unwrap())?, }, tools: Some(SyncedDir { - path: radamsa_dir.to_path_buf(), - url: None, + path: tools_local, + url: BlobContainerUrl::parse(Url::from_directory_path(radamsa_dir).unwrap())?, }), target_exe: Default::default(), target_env: Default::default(), diff --git a/src/agent/onefuzz-agent/src/tasks/fuzz/libfuzzer_fuzz.rs b/src/agent/onefuzz-agent/src/tasks/fuzz/libfuzzer_fuzz.rs index 7a177e8be..f655a2f67 100644 --- a/src/agent/onefuzz-agent/src/tasks/fuzz/libfuzzer_fuzz.rs +++ b/src/agent/onefuzz-agent/src/tasks/fuzz/libfuzzer_fuzz.rs @@ -17,9 +17,8 @@ use onefuzz_telemetry::{ }; use serde::Deserialize; use std::{collections::HashMap, path::PathBuf}; -use tempfile::tempdir; +use tempfile::{tempdir_in, TempDir}; use tokio::{ - fs::rename, io::{AsyncBufReadExt, BufReader}, sync::mpsc, task, @@ -126,6 +125,20 @@ impl LibFuzzerFuzzTask { Ok(()) } + /// Creates a temporary directory in the current task directory + async fn create_local_temp_dir(&self) -> Result { + let task_dir = self + .config + .inputs + .path + .parent() + .ok_or_else(|| anyhow!("Invalid input path"))?; + let temp_path = task_dir.join(".temp"); + tokio::fs::create_dir_all(&temp_path).await?; + let temp_dir = tempdir_in(temp_path)?; + Ok(temp_dir) + } + // The fuzzer monitor coordinates a _series_ of fuzzer runs. // // A run is one session of continuous fuzzing, terminated by a fuzzing error @@ -135,7 +148,7 @@ impl LibFuzzerFuzzTask { worker_id: u64, stats_sender: Option<&StatsSender>, ) -> Result<()> { - let local_input_dir = tempdir()?; + let local_input_dir = self.create_local_temp_dir().await?; loop { self.run_fuzzer(&local_input_dir.path(), worker_id, stats_sender) .await?; @@ -165,7 +178,7 @@ impl LibFuzzerFuzzTask { worker_id: u64, stats_sender: Option<&StatsSender>, ) -> Result<()> { - let crash_dir = tempdir()?; + let crash_dir = self.create_local_temp_dir().await?; let run_id = Uuid::new_v4(); debug!("starting fuzzer run, run_id = {}", run_id); @@ -235,7 +248,7 @@ impl LibFuzzerFuzzTask { for file in &files { if let Some(filename) = file.file_name() { let dest = self.config.crashes.path.join(filename); - rename(file, dest).await?; + tokio::fs::rename(file, dest).await?; } } diff --git a/src/agent/onefuzz-agent/src/tasks/fuzz/supervisor.rs b/src/agent/onefuzz-agent/src/tasks/fuzz/supervisor.rs index 549d453ad..a597602bf 100644 --- a/src/agent/onefuzz-agent/src/tasks/fuzz/supervisor.rs +++ b/src/agent/onefuzz-agent/src/tasks/fuzz/supervisor.rs @@ -31,7 +31,7 @@ use tokio::{ sync::Notify, }; -#[derive(Debug, Deserialize, Default)] +#[derive(Debug, Deserialize)] pub struct SupervisorConfig { pub inputs: SyncedDir, pub crashes: SyncedDir, @@ -199,10 +199,11 @@ async fn start_supervisor( .set_optional_ref(&config.common.instance_telemetry_key, |tester, key| { tester.instance_telemetry_key(&key) }) - .set_optional_ref(&config.crashes.url, |tester, url| { - tester - .crashes_account(&url.account()) - .crashes_container(&url.container()) + .set_optional_ref(&config.crashes.url.account(), |tester, account| { + tester.crashes_account(account) + }) + .set_optional_ref(&config.crashes.url.container(), |tester, container| { + tester.crashes_container(container) }); let supervisor_path = expand.evaluate_value(&config.supervisor_exe)?; @@ -255,6 +256,8 @@ mod tests { #[cfg(target_os = "linux")] #[cfg_attr(not(feature = "integration_test"), ignore)] async fn test_fuzzer_linux() { + use onefuzz::blob::BlobContainerUrl; + use reqwest::Url; use std::env; let runtime_dir = tempfile::tempdir().unwrap(); @@ -277,15 +280,19 @@ mod tests { let reports_dir = reports_dir_temp.path().into(); let fault_dir_temp = tempfile::tempdir().unwrap(); + let crashes_local = tempfile::tempdir().unwrap().path().into(); + let corpus_dir_local = tempfile::tempdir().unwrap().path().into(); let crashes = SyncedDir { - path: fault_dir_temp.path().into(), - url: None, + path: crashes_local, + url: BlobContainerUrl::parse(Url::from_directory_path(fault_dir_temp).unwrap()) + .unwrap(), }; let corpus_dir_temp = tempfile::tempdir().unwrap(); let corpus_dir = SyncedDir { - path: corpus_dir_temp.path().into(), - url: None, + path: corpus_dir_local, + url: BlobContainerUrl::parse(Url::from_directory_path(corpus_dir_temp).unwrap()) + .unwrap(), }; let seed_file_name = corpus_dir.path.join("seed.txt"); tokio::fs::write(seed_file_name, "xyz").await.unwrap(); @@ -316,7 +323,17 @@ mod tests { supervisor_input_marker, target_exe, target_options, - ..Default::default() + inputs: corpus_dir.clone(), + crashes: crashes.clone(), + tools: None, + wait_for_files: None, + stats_file: None, + stats_format: None, + ensemble_sync_delay: None, + reports: None, + unique_reports: None, + no_repro: None, + common: CommonConfig::default(), }; let process = start_supervisor(runtime_dir, &config, &crashes, &corpus_dir, reports_dir) diff --git a/src/agent/onefuzz-agent/src/tasks/generic/input_poller.rs b/src/agent/onefuzz-agent/src/tasks/generic/input_poller.rs index bfbd4edb4..4900a441c 100644 --- a/src/agent/onefuzz-agent/src/tasks/generic/input_poller.rs +++ b/src/agent/onefuzz-agent/src/tasks/generic/input_poller.rs @@ -121,9 +121,7 @@ impl InputPoller { to_process: &SyncedDir, ) -> Result<()> { self.batch_dir = Some(to_process.clone()); - if to_process.url.is_some() { - to_process.init_pull().await?; - } + to_process.init_pull().await?; info!("batch processing directory: {}", to_process.path.display()); let mut read_dir = fs::read_dir(&to_process.path).await?; diff --git a/src/agent/onefuzz-agent/src/tasks/generic/input_poller/callback.rs b/src/agent/onefuzz-agent/src/tasks/generic/input_poller/callback.rs index 8148e0d97..5f5ada48a 100644 --- a/src/agent/onefuzz-agent/src/tasks/generic/input_poller/callback.rs +++ b/src/agent/onefuzz-agent/src/tasks/generic/input_poller/callback.rs @@ -6,26 +6,27 @@ use std::path::{Path, PathBuf}; use anyhow::Result; use async_trait::async_trait; use reqwest::Url; -use storage_queue::{Message, QueueClient}; +use storage_queue::Message; +use storage_queue::QueueClient; #[async_trait] -pub trait Queue { +pub trait Queue: Send { async fn pop(&mut self) -> Result>; async fn delete(&mut self, msg: M) -> Result<()>; } -pub trait Parser { +pub trait Parser: Send { fn parse(&mut self, msg: &M) -> Result; } #[async_trait] -pub trait Downloader { +pub trait Downloader: Send { async fn download(&mut self, url: Url, dir: &Path) -> Result; } #[async_trait] -pub trait Processor { +pub trait Processor: Send { async fn process(&mut self, url: Option, input: &Path) -> Result<()>; } @@ -72,9 +73,8 @@ impl

CallbackImpl

where P: Processor + Send, { - pub fn new(queue_url: Url, processor: P) -> Self { - let queue = QueueClient::new(queue_url); - Self { queue, processor } + pub fn new(queue: QueueClient, processor: P) -> Result { + Ok(Self { queue, processor }) } } @@ -88,7 +88,7 @@ where } async fn delete(&mut self, msg: Message) -> Result<()> { - self.queue.delete(msg).await + msg.delete().await } } @@ -97,9 +97,10 @@ where P: Processor + Send, { fn parse(&mut self, msg: &Message) -> Result { - let text = std::str::from_utf8(msg.data())?; - let url = Url::parse(text)?; - + let url = msg.parse(|data| { + let data = std::str::from_utf8(data)?; + Ok(Url::parse(data)?) + })?; Ok(url) } } diff --git a/src/agent/onefuzz-agent/src/tasks/heartbeat.rs b/src/agent/onefuzz-agent/src/tasks/heartbeat.rs index 4d57021b6..0787bdb1a 100644 --- a/src/agent/onefuzz-agent/src/tasks/heartbeat.rs +++ b/src/agent/onefuzz-agent/src/tasks/heartbeat.rs @@ -68,7 +68,7 @@ pub async fn init_task_heartbeat( }) .await; }, - ); + )?; Ok(hb) } diff --git a/src/agent/onefuzz-agent/src/tasks/merge/generic.rs b/src/agent/onefuzz-agent/src/tasks/merge/generic.rs index c53353b92..c046c7f02 100644 --- a/src/agent/onefuzz-agent/src/tasks/merge/generic.rs +++ b/src/agent/onefuzz-agent/src/tasks/merge/generic.rs @@ -56,9 +56,10 @@ pub async fn spawn(config: Arc) -> Result<()> { debug!("tmp dir reset"); utils::reset_tmp_dir(&tmp_dir).await?; config.unique_inputs.sync_pull().await?; - let mut queue = QueueClient::new(config.input_queue.clone()); + let queue = QueueClient::new(config.input_queue.clone())?; if let Some(msg) = queue.pop().await? { - let input_url = match utils::parse_url_data(msg.data()) { + let input_url = msg.parse(utils::parse_url_data); + let input_url = match input_url { Ok(url) => url, Err(err) => { error!("could not parse input URL from queue message: {}", err); @@ -74,7 +75,7 @@ pub async fn spawn(config: Arc) -> Result<()> { } else { debug!("will delete popped message with id = {}", msg.id()); - queue.delete(msg).await?; + msg.delete().await?; debug!( "Attempting to delete {} from the candidate container", @@ -88,7 +89,7 @@ pub async fn spawn(config: Arc) -> Result<()> { } else { warn!("no new candidate inputs found, sleeping"); delay_with_jitter(EMPTY_QUEUE_DELAY).await; - } + }; } } diff --git a/src/agent/onefuzz-agent/src/tasks/merge/libfuzzer_merge.rs b/src/agent/onefuzz-agent/src/tasks/merge/libfuzzer_merge.rs index fd7dd4df8..c6b2e0eee 100644 --- a/src/agent/onefuzz-agent/src/tasks/merge/libfuzzer_merge.rs +++ b/src/agent/onefuzz-agent/src/tasks/merge/libfuzzer_merge.rs @@ -34,7 +34,7 @@ pub struct Config { pub target_exe: PathBuf, pub target_env: HashMap, pub target_options: Vec, - pub input_queue: Option, + pub input_queue: Option, pub inputs: Vec, pub unique_inputs: SyncedDir, pub preserve_existing_outputs: bool, @@ -58,10 +58,9 @@ pub async fn spawn(config: Arc) -> Result<()> { } config.unique_inputs.init().await?; - if let Some(url) = config.input_queue.clone() { + if let Some(queue) = config.input_queue.clone() { loop { - let queue = QueueClient::new(url.clone()); - if let Err(error) = process_message(config.clone(), queue).await { + if let Err(error) = process_message(config.clone(), queue.clone()).await { error!( "failed to process latest message from notification queue: {}", error @@ -85,7 +84,7 @@ pub async fn spawn(config: Arc) -> Result<()> { } } -async fn process_message(config: Arc, mut input_queue: QueueClient) -> Result<()> { +async fn process_message(config: Arc, input_queue: QueueClient) -> Result<()> { let hb_client = config.common.init_heartbeat().await?; hb_client.alive(); let tmp_dir = "./tmp"; @@ -93,7 +92,11 @@ async fn process_message(config: Arc, mut input_queue: QueueClient) -> R utils::reset_tmp_dir(tmp_dir).await?; if let Some(msg) = input_queue.pop().await? { - let input_url = match utils::parse_url_data(msg.data()) { + let input_url = msg.parse(|data| { + let data = std::str::from_utf8(data)?; + Ok(Url::parse(data)?) + }); + let input_url: Url = match input_url { Ok(url) => url, Err(err) => { error!("could not parse input URL from queue message: {}", err); @@ -107,7 +110,7 @@ async fn process_message(config: Arc, mut input_queue: QueueClient) -> R debug!("will delete popped message with id = {}", msg.id()); - input_queue.delete(msg).await?; + msg.delete().await?; debug!( "Attempting to delete {} from the candidate container", diff --git a/src/agent/onefuzz-agent/src/tasks/report/crash_report.rs b/src/agent/onefuzz-agent/src/tasks/report/crash_report.rs index d64f5a3ca..7d6cced29 100644 --- a/src/agent/onefuzz-agent/src/tasks/report/crash_report.rs +++ b/src/agent/onefuzz-agent/src/tasks/report/crash_report.rs @@ -3,22 +3,14 @@ use anyhow::{Context, Result}; use futures::StreamExt; -use onefuzz::{ - asan::AsanLog, - blob::{BlobClient, BlobUrl}, - fs::exists, - monitor::DirectoryMonitor, - syncdir::SyncedDir, -}; +use onefuzz::{asan::AsanLog, blob::BlobUrl, monitor::DirectoryMonitor, syncdir::SyncedDir}; use onefuzz_telemetry::{ Event::{new_report, new_unable_to_reproduce, new_unique_report}, EventData, }; -use reqwest::{StatusCode, Url}; -use reqwest_retry::SendRetry; + use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; -use tokio::fs; use uuid::Uuid; #[derive(Debug, Deserialize, Serialize)] @@ -66,41 +58,12 @@ pub enum CrashTestResult { NoRepro(NoCrash), } -// Conditionally upload a report, if it would not be a duplicate. -async fn upload(report: &T, url: Url) -> Result { - let blob = BlobClient::new(); - let result = blob - .put(url) - .json(report) - // Conditional PUT, only if-not-exists. - // https://docs.microsoft.com/en-us/rest/api/storageservices/specifying-conditional-headers-for-blob-service-operations - .header("If-None-Match", "*") - .send_retry_default() - .await?; - Ok(result.status() == StatusCode::CREATED) -} - async fn upload_or_save_local( report: &T, dest_name: &str, container: &SyncedDir, ) -> Result { - match &container.url { - Some(blob_url) => { - let url = blob_url.blob(dest_name).url(); - upload(report, url).await - } - None => { - let path = container.path.join(dest_name); - if !exists(&path).await? { - let data = serde_json::to_vec(&report)?; - fs::write(path, data).await?; - Ok(true) - } else { - Ok(false) - } - } - } + container.upload(dest_name, report).await } impl CrashTestResult { @@ -143,8 +106,8 @@ impl CrashTestResult { #[derive(Debug, Deserialize, Serialize)] pub struct InputBlob { - pub account: String, - pub container: String, + pub account: Option, + pub container: Option, pub name: String, } diff --git a/src/agent/onefuzz-agent/src/tasks/report/generic.rs b/src/agent/onefuzz-agent/src/tasks/report/generic.rs index 92dcedcb1..41b8d0c66 100644 --- a/src/agent/onefuzz-agent/src/tasks/report/generic.rs +++ b/src/agent/onefuzz-agent/src/tasks/report/generic.rs @@ -10,17 +10,14 @@ use crate::tasks::{ }; use anyhow::Result; use async_trait::async_trait; -use futures::stream::StreamExt; -use onefuzz::{ - blob::BlobUrl, input_tester::Tester, monitor::DirectoryMonitor, sha256, syncdir::SyncedDir, -}; +use onefuzz::{blob::BlobUrl, input_tester::Tester, sha256, syncdir::SyncedDir}; use reqwest::Url; use serde::Deserialize; use std::{ collections::HashMap, path::{Path, PathBuf}, }; -use storage_queue::Message; +use storage_queue::{Message, QueueClient}; #[derive(Debug, Deserialize)] pub struct Config { @@ -32,7 +29,7 @@ pub struct Config { #[serde(default)] pub target_env: HashMap, - pub input_queue: Option, + pub input_queue: Option, pub crashes: Option, pub reports: Option, pub unique_reports: Option, @@ -65,30 +62,6 @@ impl ReportTask { Self { config, poller } } - pub async fn local_run(&self) -> Result<()> { - let mut processor = GenericReportProcessor::new(&self.config, None); - - info!("Starting generic crash report task"); - let crashes = match &self.config.crashes { - Some(x) => x, - None => bail!("missing crashes directory"), - }; - - let mut read_dir = tokio::fs::read_dir(&crashes.path).await?; - while let Some(crash) = read_dir.next().await { - processor.process(None, &crash?.path()).await?; - } - - if self.config.check_queue { - let mut monitor = DirectoryMonitor::new(&crashes.path); - monitor.start()?; - while let Some(crash) = monitor.next().await { - processor.process(None, &crash).await?; - } - } - Ok(()) - } - pub async fn managed_run(&mut self) -> Result<()> { info!("Starting generic crash report task"); let heartbeat_client = self.config.common.init_heartbeat().await?; @@ -102,7 +75,7 @@ impl ReportTask { info!("processing crashes from queue"); if self.config.check_queue { if let Some(queue) = &self.config.input_queue { - let callback = CallbackImpl::new(queue.clone(), processor); + let callback = CallbackImpl::new(queue.clone(), processor)?; self.poller.run(callback).await?; } } diff --git a/src/agent/onefuzz-agent/src/tasks/report/libfuzzer_report.rs b/src/agent/onefuzz-agent/src/tasks/report/libfuzzer_report.rs index 732e09a3f..e1e3706e3 100644 --- a/src/agent/onefuzz-agent/src/tasks/report/libfuzzer_report.rs +++ b/src/agent/onefuzz-agent/src/tasks/report/libfuzzer_report.rs @@ -7,10 +7,7 @@ use crate::tasks::{ }; use anyhow::{Context, Result}; use async_trait::async_trait; -use futures::stream::StreamExt; -use onefuzz::{ - blob::BlobUrl, libfuzzer::LibFuzzer, monitor::DirectoryMonitor, sha256, syncdir::SyncedDir, -}; +use onefuzz::{blob::BlobUrl, libfuzzer::LibFuzzer, sha256, syncdir::SyncedDir}; use reqwest::Url; use serde::Deserialize; use std::{ @@ -18,7 +15,7 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; -use storage_queue::Message; +use storage_queue::{Message, QueueClient}; #[derive(Debug, Deserialize)] pub struct Config { @@ -27,7 +24,7 @@ pub struct Config { // TODO: options are not yet used for crash reporting pub target_options: Vec, pub target_timeout: Option, - pub input_queue: Option, + pub input_queue: Option, pub crashes: Option, pub reports: Option, pub unique_reports: Option, @@ -59,13 +56,8 @@ impl ReportTask { Self { config, poller } } - pub async fn local_run(&self) -> Result<()> { - let mut processor = AsanProcessor::new(self.config.clone()).await?; - let crashes = match &self.config.crashes { - Some(x) => x, - None => bail!("missing crashes directory"), - }; - crashes.init().await?; + pub async fn managed_run(&mut self) -> Result<()> { + info!("Starting libFuzzer crash report task"); if let Some(unique_reports) = &self.config.unique_reports { unique_reports.init().await?; @@ -77,30 +69,6 @@ impl ReportTask { no_repro.init().await?; } - let mut read_dir = tokio::fs::read_dir(&crashes.path).await.with_context(|| { - format_err!( - "unable to read crashes directory {}", - crashes.path.display() - ) - })?; - - while let Some(crash) = read_dir.next().await { - processor.process(None, &crash?.path()).await?; - } - - if self.config.check_queue { - let mut monitor = DirectoryMonitor::new(crashes.path.clone()); - monitor.start()?; - while let Some(crash) = monitor.next().await { - processor.process(None, &crash).await?; - } - } - - Ok(()) - } - - pub async fn managed_run(&mut self) -> Result<()> { - info!("Starting libFuzzer crash report task"); let mut processor = AsanProcessor::new(self.config.clone()).await?; if let Some(crashes) = &self.config.crashes { @@ -108,8 +76,8 @@ impl ReportTask { } if self.config.check_queue { - if let Some(queue) = &self.config.input_queue { - let callback = CallbackImpl::new(queue.clone(), processor); + if let Some(url) = &self.config.input_queue { + let callback = CallbackImpl::new(url.clone(), processor)?; self.poller.run(callback).await?; } } diff --git a/src/agent/onefuzz-agent/src/tasks/utils.rs b/src/agent/onefuzz-agent/src/tasks/utils.rs index 4ed1fb346..24fe49afc 100644 --- a/src/agent/onefuzz-agent/src/tasks/utils.rs +++ b/src/agent/onefuzz-agent/src/tasks/utils.rs @@ -14,26 +14,34 @@ pub async fn download_input(input_url: Url, dst: impl AsRef) -> Result) -> Result<()> { diff --git a/src/agent/onefuzz-supervisor/src/agent.rs b/src/agent/onefuzz-supervisor/src/agent.rs index 7de40c43f..80b24b8f8 100644 --- a/src/agent/onefuzz-supervisor/src/agent.rs +++ b/src/agent/onefuzz-supervisor/src/agent.rs @@ -130,26 +130,27 @@ impl Agent { if can_schedule.allowed { info!("claiming work set: {:?}", msg.work_set); - let claim = self.work_queue.claim(msg.receipt).await; + match self.work_queue.claim(msg).await { + Err(err) => { + error!("unable to claim work set: {}", err); - if let Err(err) = claim { - error!("unable to claim work set: {}", err); + // We were unable to claim the work set, so it will reappear in the pool's + // work queue when the visibility timeout expires. Don't execute the work, + // or else another node will pick it up, and it will be double-scheduled. + // + // Stay in the `Free` state. + state.into() + } + Ok(work_set) => { + info!("claimed work set: {:?}", work_set); - // We were unable to claim the work set, so it will reappear in the pool's - // work queue when the visibility timeout expires. Don't execute the work, - // or else another node will pick it up, and it will be double-scheduled. - // - // Stay in the `Free` state. - state.into() - } else { - info!("claimed work set: {:?}", msg.work_set); - - // We are allowed to schedule this work, and we have claimed it, so no other - // node will see it. - // - // Transition to `SettingUp` state. - let state = state.schedule(msg.work_set.clone()); - state.into() + // We are allowed to schedule this work, and we have claimed it, so no other + // node will see it. + // + // Transition to `SettingUp` state. + let state = state.schedule(work_set); + state.into() + } } } else { // We cannot schedule the work set. Depending on why, we want to either drop the work @@ -160,10 +161,13 @@ impl Agent { // If `work_stopped`, the work set is not valid for any node, and we should drop it for the // entire pool by claiming but not executing it. if can_schedule.work_stopped { - if let Err(err) = self.work_queue.claim(msg.receipt).await { - error!("unable to drop stopped work: {}", err); - } else { - info!("dropped stopped work set: {:?}", msg.work_set); + match self.work_queue.claim(msg).await { + Err(err) => { + error!("unable to drop stopped work: {}", err); + } + Ok(work_set) => { + info!("dropped stopped work set: {:?}", work_set); + } } } else { // Otherwise, the work was not stopped, but we still should not execute it. This is likely diff --git a/src/agent/onefuzz-supervisor/src/agent/tests.rs b/src/agent/onefuzz-supervisor/src/agent/tests.rs index d5822c5e8..50cd0ae18 100644 --- a/src/agent/onefuzz-supervisor/src/agent/tests.rs +++ b/src/agent/onefuzz-supervisor/src/agent/tests.rs @@ -47,19 +47,12 @@ impl Fixture { } pub fn message(&self) -> Message { - let receipt = self.receipt(); let work_set = self.work_set(); - Message { receipt, work_set } - } - - pub fn receipt(&self) -> Receipt { - let message_id = "6a0bc779-a1a8-4112-93cd-eb0d77529aa3".parse().unwrap(); - - Receipt(storage_queue::Receipt { - message_id, - pop_receipt: "abc".into(), - }) + Message { + work_set, + queue_message: None, + } } pub fn work_set(&self) -> WorkSet { @@ -97,7 +90,12 @@ async fn test_update_free_no_work() { assert!(matches!(agent.scheduler().unwrap(), Scheduler::Free(..))); let double: &WorkQueueDouble = agent.work_queue.downcast_ref().unwrap(); - assert_eq!(double.claimed, &[]); + let claimed_worksets = double + .claimed + .iter() + .map(|cl| cl.work_set.clone()) + .collect::>(); + assert_eq!(claimed_worksets, &[]); } #[tokio::test] @@ -119,7 +117,12 @@ async fn test_update_free_has_work() { )); let double: &WorkQueueDouble = agent.work_queue.downcast_ref().unwrap(); - assert_eq!(double.claimed, &[Fixture.receipt()]); + let claimed_worksets = double + .claimed + .iter() + .map(|cl| cl.work_set.clone()) + .collect::>(); + assert_eq!(claimed_worksets, &[Fixture.work_set()]); } #[tokio::test] diff --git a/src/agent/onefuzz-supervisor/src/commands.rs b/src/agent/onefuzz-supervisor/src/commands.rs index 1e37907b7..113f68d8a 100644 --- a/src/agent/onefuzz-supervisor/src/commands.rs +++ b/src/agent/onefuzz-supervisor/src/commands.rs @@ -1,9 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -use crate::auth::Secret; use anyhow::{Context, Result}; -use onefuzz::machine_id::get_scaleset_name; +use onefuzz::{auth::Secret, machine_id::get_scaleset_name}; use std::process::Stdio; use tokio::{fs, io::AsyncWriteExt, process::Command}; diff --git a/src/agent/onefuzz-supervisor/src/config.rs b/src/agent/onefuzz-supervisor/src/config.rs index f991a9e05..c5cbe1d26 100644 --- a/src/agent/onefuzz-supervisor/src/config.rs +++ b/src/agent/onefuzz-supervisor/src/config.rs @@ -3,6 +3,7 @@ use anyhow::{Context, Result}; use onefuzz::{ + auth::{ClientCredentials, Credentials, ManagedIdentityCredentials}, http::{is_auth_error_code, ResponseExt}, jitter::delay_with_jitter, }; @@ -16,8 +17,6 @@ use tokio::fs; use url::Url; use uuid::Uuid; -use crate::auth::{ClientCredentials, Credentials, ManagedIdentityCredentials}; - #[derive(Clone, Debug, Deserialize, Eq, PartialEq)] pub struct StaticConfig { pub credentials: Credentials, diff --git a/src/agent/onefuzz-supervisor/src/coordinator.rs b/src/agent/onefuzz-supervisor/src/coordinator.rs index 867ffec42..44cb5f8df 100644 --- a/src/agent/onefuzz-supervisor/src/coordinator.rs +++ b/src/agent/onefuzz-supervisor/src/coordinator.rs @@ -3,12 +3,11 @@ use anyhow::Result; use downcast_rs::Downcast; -use onefuzz::{http::ResponseExt, process::Output}; +use onefuzz::{auth::AccessToken, http::ResponseExt, process::Output}; use reqwest::{Client, Request, Response, StatusCode}; use serde::Serialize; use uuid::Uuid; -use crate::auth::AccessToken; use crate::commands::SshKeyInfo; use crate::config::Registration; use crate::work::{TaskId, WorkSet}; diff --git a/src/agent/onefuzz-supervisor/src/heartbeat.rs b/src/agent/onefuzz-supervisor/src/heartbeat.rs index 6724c3ceb..46110be37 100644 --- a/src/agent/onefuzz-supervisor/src/heartbeat.rs +++ b/src/agent/onefuzz-supervisor/src/heartbeat.rs @@ -50,7 +50,7 @@ pub async fn init_agent_heartbeat(queue_url: Url) -> Result Result<()> { let scheduler = reboot_context.into(); debug!("loaded scheduler: {}", scheduler); - let work_queue = work::WorkQueue::new(registration.clone()); + let work_queue = work::WorkQueue::new(registration.clone())?; let agent_heartbeat = match config.heartbeat_queue { Some(url) => Some(init_agent_heartbeat(url).await?), diff --git a/src/agent/onefuzz-supervisor/src/work.rs b/src/agent/onefuzz-supervisor/src/work.rs index cacf3b2f0..2599a6f92 100644 --- a/src/agent/onefuzz-supervisor/src/work.rs +++ b/src/agent/onefuzz-supervisor/src/work.rs @@ -6,12 +6,11 @@ use std::path::PathBuf; use anyhow::{Context, Result}; use downcast_rs::Downcast; -use onefuzz::{blob::BlobContainerUrl, http::is_auth_error}; -use storage_queue::QueueClient; +use onefuzz::{auth::Secret, blob::BlobContainerUrl, http::is_auth_error}; +use storage_queue::{Message as QueueMessage, QueueClient}; use tokio::fs; use uuid::Uuid; -use crate::auth::Secret; use crate::config::Registration; pub type JobId = Uuid; @@ -71,7 +70,10 @@ impl WorkSet { } pub fn setup_dir(&self) -> Result { - let setup_dir = self.setup_url.container(); + let setup_dir = self + .setup_url + .account() + .ok_or_else(|| anyhow!("Invalid container Url"))?; Ok(onefuzz::fs::onefuzz_root()? .join("blob-containers") .join(setup_dir)) @@ -104,7 +106,7 @@ impl WorkUnit { pub trait IWorkQueue: Downcast { async fn poll(&mut self) -> Result>; - async fn claim(&mut self, receipt: Receipt) -> Result<()>; + async fn claim(&mut self, message: Message) -> Result; } #[async_trait] @@ -113,36 +115,33 @@ impl IWorkQueue for WorkQueue { self.poll().await } - async fn claim(&mut self, receipt: Receipt) -> Result<()> { - self.claim(receipt).await + async fn claim(&mut self, message: Message) -> Result { + self.claim(message).await } } impl_downcast!(IWorkQueue); -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Debug)] pub struct Message { - pub receipt: Receipt, + pub queue_message: Option, pub work_set: WorkSet, } -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Receipt(pub storage_queue::Receipt); - pub struct WorkQueue { queue: QueueClient, registration: Registration, } impl WorkQueue { - pub fn new(registration: Registration) -> Self { + pub fn new(registration: Registration) -> Result { let url = registration.dynamic_config.work_queue.clone(); - let queue = QueueClient::new(url); + let queue = QueueClient::new(url)?; - Self { + Ok(Self { queue, registration, - } + }) } async fn renew(&mut self) -> Result<()> { @@ -151,7 +150,7 @@ impl WorkQueue { .await .context("unable to renew registration in workqueue")?; let url = self.registration.dynamic_config.work_queue.clone(); - self.queue = QueueClient::new(url); + self.queue = QueueClient::new(url)?; Ok(()) } @@ -171,38 +170,44 @@ impl WorkQueue { // Now we've had a chance to ensure our SAS URL is fresh. For any other // error, including another auth error, bail. - let msg = msg.context("unable to check work queue")?; + let msg = msg.context("failed to pop message")?; - let result = match msg { - Some(msg) => { - let work_set = - serde_json::from_slice(msg.data()).context("unable to parse WorkSet")?; - let receipt = Receipt(msg.receipt); - Some(Message { receipt, work_set }) - } - None => None, - }; - Ok(result) - } - - pub async fn claim(&mut self, receipt: Receipt) -> Result<()> { - let receipt = receipt.0; - - let result = self.queue.delete(receipt.clone()).await; - - // If we had an auth err, renew our registration and retry once, in case - // it was just due to a stale SAS URL. - if let Err(err) = &result { - if is_auth_error(err) { - self.renew().await.context("unable to renew registration")?; - self.queue - .delete(receipt) - .await - .context("unable to claim work from queue")?; - } + if msg.is_none() { + return Ok(None); } - Ok(()) + let queue_message = msg.unwrap(); + let work_set: WorkSet = queue_message.get()?; + let msg = Message { + queue_message: Some(queue_message), + work_set, + }; + + Ok(Some(msg)) + } + + pub async fn claim(&mut self, message: Message) -> Result { + if let Some(queue_message) = message.queue_message { + match queue_message.delete().await { + Err(err) => { + if is_auth_error(&err) { + self.renew().await.context("unable to renew registration")?; + let url = self.registration.dynamic_config.work_queue.clone(); + queue_message + .update_url(url) + .delete() + .await + .context("unable to claim work from queue")?; + Ok(message.work_set) + } else { + bail!("{}", err) + } + } + Ok(_) => Ok(message.work_set), + } + } else { + Ok(message.work_set) + } } } diff --git a/src/agent/onefuzz-supervisor/src/work/double.rs b/src/agent/onefuzz-supervisor/src/work/double.rs index edd7923bb..0ba56c10e 100644 --- a/src/agent/onefuzz-supervisor/src/work/double.rs +++ b/src/agent/onefuzz-supervisor/src/work/double.rs @@ -3,10 +3,10 @@ use super::*; -#[derive(Clone, Debug, Default)] +#[derive(Default)] pub struct WorkQueueDouble { pub available: Vec, - pub claimed: Vec, + pub claimed: Vec, } #[async_trait] @@ -15,8 +15,9 @@ impl IWorkQueue for WorkQueueDouble { Ok(self.available.pop()) } - async fn claim(&mut self, receipt: Receipt) -> Result<()> { - self.claimed.push(receipt); - Ok(()) + async fn claim(&mut self, message: Message) -> Result { + let work_set = message.work_set.clone(); + self.claimed.push(message); + Ok(work_set) } } diff --git a/src/agent/onefuzz-supervisor/src/auth.rs b/src/agent/onefuzz/src/auth.rs similarity index 99% rename from src/agent/onefuzz-supervisor/src/auth.rs rename to src/agent/onefuzz/src/auth.rs index 52ecfcfbf..017e18cf8 100644 --- a/src/agent/onefuzz-supervisor/src/auth.rs +++ b/src/agent/onefuzz/src/auth.rs @@ -3,8 +3,8 @@ use std::fmt; +use crate::http::ResponseExt; use anyhow::Result; -use onefuzz::http::ResponseExt; use reqwest_retry::SendRetry; use url::Url; use uuid::Uuid; diff --git a/src/agent/onefuzz/src/blob/url.rs b/src/agent/onefuzz/src/blob/url.rs index 02c900cb6..1dd2c2783 100644 --- a/src/agent/onefuzz/src/blob/url.rs +++ b/src/agent/onefuzz/src/blob/url.rs @@ -1,24 +1,28 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -use std::fmt; +use std::{fmt, path::PathBuf}; use anyhow::Result; use reqwest::Url; use serde::{de, Serialize, Serializer}; #[derive(Clone, Eq, PartialEq)] -pub struct BlobUrl { - url: Url, +pub enum BlobUrl { + AzureBlob(Url), + LocalFile(PathBuf), } impl BlobUrl { pub fn new(url: Url) -> Result { - if !possible_blob_storage_url(&url, false) { - bail!("Invalid blob URL: {}", url); + if possible_blob_storage_url(&url, false) { + if let Ok(path) = url.to_file_path() { + return Ok(Self::LocalFile(path)); + } else { + return Ok(Self::AzureBlob(url)); + } } - - Ok(Self { url }) + bail!("Invalid blob URL: {}", url) } pub fn parse(url: impl AsRef) -> Result { @@ -28,47 +32,67 @@ impl BlobUrl { } pub fn url(&self) -> Url { - self.url.clone() + match self { + Self::LocalFile(path) => { + Url::from_file_path(path).expect("Could not convert path to url") + } + Self::AzureBlob(url) => url.clone(), + } } - pub fn account(&self) -> String { - // Ctor checks that domain has at least one subdomain. - self.url - .domain() - .unwrap() - .split('.') - .next() - .unwrap() - .to_owned() + pub fn account(&self) -> Option { + match self { + Self::AzureBlob(url) => { + // Ctor checks that domain has at least one subdomain. + Some(url.domain().unwrap().split('.').next().unwrap().to_owned()) + } + Self::LocalFile(_) => None, + } } - pub fn container(&self) -> String { - // Segment existence checked in ctor, so we can unwrap. - self.url.path_segments().unwrap().next().unwrap().to_owned() + pub fn container(&self) -> Option { + match self { + Self::AzureBlob(url) => { + // Segment existence checked in ctor, so we can unwrap. + Some(url.path_segments().unwrap().next().unwrap().to_owned()) + } + Self::LocalFile(_) => None, + } } pub fn name(&self) -> String { let name_segments: Vec<_> = self - .url + .url() .path_segments() .unwrap() - .skip(1) + .skip(match self { + Self::AzureBlob(_) => 1, + _ => 0, + }) .map(|s| s.to_owned()) .collect(); - name_segments.join("/") } } impl fmt::Debug for BlobUrl { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", redact_query_sas_sig(self.url())) + write!(f, "{}", redact_query_sas_sig(&self.url())) } } impl fmt::Display for BlobUrl { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}:{}/{}", self.account(), self.container(), self.name()) + match self { + Self::AzureBlob(_) => write!( + f, + "{}:{}/{}", + self.account().unwrap_or_default(), + self.container().unwrap_or_default(), + self.name() + ), + Self::LocalFile(path) => write!(f, "{}", path.display()), + } } } @@ -89,30 +113,43 @@ impl BlobContainerUrl { Ok(Self { url }) } + pub fn as_file_path(&self) -> Option { + self.url.to_file_path().ok() + } + pub fn parse(url: impl AsRef) -> Result { let url = Url::parse(url.as_ref())?; Self::new(url) } - pub fn url(&self) -> Url { - self.url.clone() + pub fn url(&self) -> &Url { + &self.url } - pub fn account(&self) -> String { - // Ctor checks that domain has at least one subdomain. - self.url - .domain() - .unwrap() - .split('.') - .next() - .unwrap() - .to_owned() + pub fn account(&self) -> Option { + if self.as_file_path().is_some() { + None + } else { + // Ctor checks that domain has at least one subdomain. + Some( + self.url + .domain() + .unwrap() + .split('.') + .next() + .unwrap() + .to_owned(), + ) + } } - pub fn container(&self) -> String { - // Segment existence checked in ctor, so we can unwrap. - self.url.path_segments().unwrap().next().unwrap().to_owned() + pub fn container(&self) -> Option { + if self.as_file_path().is_some() { + None + } else { + Some(self.url.path_segments().unwrap().next().unwrap().to_owned()) + } } pub fn blob(&self, name: impl AsRef) -> BlobUrl { @@ -134,7 +171,13 @@ impl fmt::Debug for BlobContainerUrl { impl fmt::Display for BlobContainerUrl { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}:{}", self.account(), self.container()) + if let Some(file_path) = self.as_file_path() { + write!(f, "{:?}", file_path) + } else if let (Some(account), Some(container)) = (self.account(), self.container()) { + write!(f, "{}:{}", account, container) + } else { + panic!("invalid blob url") + } } } @@ -144,7 +187,7 @@ impl From for Url { } } -fn redact_query_sas_sig(url: Url) -> Url { +fn redact_query_sas_sig(url: &Url) -> Url { let mut redacted = url.clone(); redacted.set_query(None); @@ -159,7 +202,10 @@ fn redact_query_sas_sig(url: Url) -> Url { // Weak check of necessary conditions for a storage blob or container URL. fn possible_blob_storage_url(url: &Url, container: bool) -> bool { - // Must use `https` URI scheme. + if url.scheme() == "file" { + return true; + } + if url.scheme() != "https" { return false; } @@ -296,7 +342,6 @@ mod tests { into_urls(&[ // Not valid HTTPS URLs. "data:text/plain,hello", - "file:///a/b/c", // Valid HTTP URLs, but invalid as storage URLs. "https://127.0.0.1", "https://localhost", @@ -344,23 +389,23 @@ mod tests { for url in valid_container_urls() { let url = BlobContainerUrl::new(url).expect("invalid blob container URL"); - - assert_eq!(url.account(), "myaccount"); - assert_eq!(url.container(), "mycontainer"); + assert_eq!(url.account(), Some("myaccount".into())); + assert_eq!(url.container(), Some("mycontainer".into())); } } #[test] fn test_blob_url() { for url in invalid_blob_urls() { + println!("{:?}", url); assert!(BlobUrl::new(url).is_err()); } for url in valid_blob_urls() { let url = BlobUrl::new(url).expect("invalid blob URL"); - assert_eq!(url.account(), "myaccount"); - assert_eq!(url.container(), "mycontainer"); + assert_eq!(url.account(), Some("myaccount".into())); + assert_eq!(url.container(), Some("mycontainer".into())); assert_eq!(url.name(), "myblob"); } } @@ -372,8 +417,8 @@ mod tests { let url = BlobUrl::new(url).expect("invalid blob URL"); - assert_eq!(url.account(), "myaccount"); - assert_eq!(url.container(), "mycontainer"); + assert_eq!(url.account(), Some("myaccount".into())); + assert_eq!(url.container(), Some("mycontainer".into())); assert_eq!(url.name(), "mydir/myblob"); } diff --git a/src/agent/onefuzz/src/expand.rs b/src/agent/onefuzz/src/expand.rs index 48c793fd5..7c73a1211 100644 --- a/src/agent/onefuzz/src/expand.rs +++ b/src/agent/onefuzz/src/expand.rs @@ -14,7 +14,7 @@ pub enum ExpandedValue<'a> { Path(String), Scalar(String), List(&'a [String]), - Mapping(Box, &str) -> Option>>), + Mapping(Box, &str) -> Option> + Send>), } #[derive(PartialEq, Eq, Hash, EnumIter)] diff --git a/src/agent/onefuzz/src/fs.rs b/src/agent/onefuzz/src/fs.rs index 35419354b..fd7e77e28 100644 --- a/src/agent/onefuzz/src/fs.rs +++ b/src/agent/onefuzz/src/fs.rs @@ -3,11 +3,13 @@ use anyhow::{Context, Result}; use futures::stream::StreamExt; -use std::path::{Path, PathBuf}; -#[cfg(target_os = "linux")] +use std::{ + ffi::OsStr, + path::{Path, PathBuf}, +}; + use std::process::Stdio; use tokio::fs; -#[cfg(target_os = "linux")] use tokio::process::Command; const ONEFUZZ_ROOT_ENV: &str = "ONEFUZZ_ROOT"; @@ -170,6 +172,127 @@ impl OwnedDir { } } +#[cfg(target_os = "linux")] +pub async fn sync_impl( + src: impl AsRef, + dst: impl AsRef, + delete_dst: bool, + recursive: bool, +) -> Result<()> { + let mut cmd = Command::new("rsync"); + cmd.kill_on_drop(true) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .arg(if recursive { "-zhr" } else { "-zh" }); + + if delete_dst { + cmd.arg("--delete"); + } + cmd.arg(&src).arg(&dst); + + let output = cmd + .spawn() + .context("rsync failed to start")? + .wait_with_output() + .await + .context("rsync failed to run")?; + + if !output.status.success() { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "sync failed src:{:?} dst:{:?} stdout:{:?} stderr:{:?}", + src.as_ref(), + dst.as_ref(), + stdout, + stderr + ); + } + + Ok(()) +} + +#[cfg(target_os = "windows")] +pub async fn sync_impl( + src: impl AsRef, + dst: impl AsRef, + delete_dst: bool, + recursive: bool, +) -> Result<()> { + let mut cmd = Command::new("robocopy"); + cmd.kill_on_drop(true) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .arg(&src) + .arg(&dst); + + if recursive { + cmd.arg("/e"); + } + + if delete_dst { + cmd.arg("/purge"); + } + + let output = cmd + .spawn() + .context("robocopy failed to start")? + .wait_with_output() + .await + .context("robocopy failed to run")?; + + if let Some(code) = output.status.code() { + // any value >= 8 indicates that there was at least one failure during the copy operation + // https://ss64.com/nt/robocopy-exit.html + if code >= 8 { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + + anyhow::bail!( + "sync failed src:{:?} dst:{:?} stdout:{:?} stderr:{:?} exist Status {:?}", + src.as_ref(), + dst.as_ref(), + stdout, + stderr, + output.status + ); + } + } + + Ok(()) +} + +pub struct SyncPath { + path: PathBuf, +} + +impl SyncPath { + pub fn dir(path: impl AsRef) -> SyncPath { + // adding a trailing to indicate that the path is a folder + // linux requires this for copy/sync operations to work as expected + let path = path.as_ref().join(""); + Self { path } + } + pub fn file(path: impl AsRef) -> SyncPath { + let path = path.as_ref().into(); + Self { path } + } +} + +impl AsRef for SyncPath { + fn as_ref(&self) -> &OsStr { + self.path.as_os_str() + } +} + +pub async fn sync(src: SyncPath, dst: SyncPath, delete_dst: bool) -> Result<()> { + sync_impl(src, dst, delete_dst, true).await +} + +pub async fn copy(src: SyncPath, dst: SyncPath, recur: bool) -> Result<()> { + sync_impl(src, dst, false, recur).await +} + #[cfg(test)] mod tests { use tempfile::tempdir; diff --git a/src/agent/onefuzz/src/heartbeat.rs b/src/agent/onefuzz/src/heartbeat.rs index dd76ea00e..c1933a4a0 100644 --- a/src/agent/onefuzz/src/heartbeat.rs +++ b/src/agent/onefuzz/src/heartbeat.rs @@ -61,7 +61,7 @@ where where Fut: Future + Send, { - let queue_client = Arc::new(QueueClient::new(queue_url)); + let queue_client = Arc::new(QueueClient::new(queue_url)?); flush(queue_client.clone(), messages.clone()).await; while !cancelled.is_notified(heartbeat_period).await { flush(queue_client.clone(), messages.clone()).await; @@ -75,7 +75,7 @@ where queue_url: Url, heartbeat_period: Option, flush: F, - ) -> HeartbeatClient + ) -> Result> where F: Fn(Arc>) -> Fut + Sync + Send + 'static, Fut: Future + Send, @@ -86,7 +86,7 @@ where let context = Arc::new(HeartbeatContext { state: context, - queue_client: QueueClient::new(queue_url), + queue_client: QueueClient::new(queue_url)?, pending_messages: Mutex::new(HashSet::::new()), cancelled: Notify::new(), }); @@ -102,9 +102,9 @@ where Ok(()) }); - HeartbeatClient { + Ok(HeartbeatClient { context, heartbeat_process, - } + }) } } diff --git a/src/agent/onefuzz/src/lib.rs b/src/agent/onefuzz/src/lib.rs index 548e61959..91bdded9b 100644 --- a/src/agent/onefuzz/src/lib.rs +++ b/src/agent/onefuzz/src/lib.rs @@ -11,6 +11,7 @@ extern crate lazy_static; extern crate onefuzz_telemetry; pub mod asan; +pub mod auth; pub mod az_copy; pub mod blob; pub mod env; diff --git a/src/agent/onefuzz/src/monitor.rs b/src/agent/onefuzz/src/monitor.rs index 517b82875..cae122ecc 100644 --- a/src/agent/onefuzz/src/monitor.rs +++ b/src/agent/onefuzz/src/monitor.rs @@ -9,6 +9,7 @@ use futures::{ task::{self, Poll}, }; use notify::{DebouncedEvent, Watcher}; +use std::sync::mpsc::TryRecvError; pub struct DirectoryMonitor { dir: PathBuf, @@ -47,15 +48,9 @@ impl DirectoryMonitor { self.watcher.unwatch(self.dir.clone())?; Ok(()) } -} -impl Stream for DirectoryMonitor { - type Item = PathBuf; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut task::Context) -> Poll> { - use std::sync::mpsc::TryRecvError; - - let poll = match self.rx.try_recv() { + pub fn poll_file(&mut self) -> Poll> { + match self.rx.try_recv() { Ok(DebouncedEvent::Create(path)) => Poll::Ready(Some(path)), Ok(DebouncedEvent::Remove(path)) => { if path == self.dir { @@ -80,10 +75,16 @@ impl Stream for DirectoryMonitor { self.stop().ok(); Poll::Ready(None) } - }; + } + } +} +impl Stream for DirectoryMonitor { + type Item = PathBuf; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut task::Context) -> Poll> { + let poll = self.poll_file(); cx.waker().wake_by_ref(); - poll } } diff --git a/src/agent/onefuzz/src/syncdir.rs b/src/agent/onefuzz/src/syncdir.rs index c666af97e..2858ab919 100644 --- a/src/agent/onefuzz/src/syncdir.rs +++ b/src/agent/onefuzz/src/syncdir.rs @@ -2,12 +2,19 @@ // Licensed under the MIT License. use crate::{ - az_copy, blob::BlobContainerUrl, jitter::delay_with_jitter, monitor::DirectoryMonitor, + az_copy, + blob::{BlobClient, BlobContainerUrl}, + fs::{exists, sync, SyncPath}, + jitter::delay_with_jitter, + monitor::DirectoryMonitor, uploader::BlobUploader, }; use anyhow::{Context, Result}; use futures::stream::StreamExt; use onefuzz_telemetry::{Event, EventData}; +use reqwest::StatusCode; +use reqwest_retry::SendRetry; +use serde::{Deserialize, Serialize}; use std::{path::PathBuf, str, time::Duration}; use tokio::fs; @@ -20,35 +27,48 @@ pub enum SyncOperation { const DELAY: Duration = Duration::from_secs(10); const DEFAULT_CONTINUOUS_SYNC_DELAY_SECONDS: u64 = 60; -#[derive(Debug, Deserialize, Clone, PartialEq, Default)] +#[derive(Debug, Deserialize, Clone, PartialEq)] pub struct SyncedDir { pub path: PathBuf, - pub url: Option, + pub url: BlobContainerUrl, } impl SyncedDir { pub async fn sync(&self, operation: SyncOperation, delete_dst: bool) -> Result<()> { - if self.url.is_none() { - debug!("not syncing as SyncedDir is missing remote URL"); - return Ok(()); - } - - let dir = &self.path; - let url = self.url.as_ref().unwrap().url(); - let url = url.as_ref(); - debug!("syncing {:?} {}", operation, dir.display()); - match operation { - SyncOperation::Push => az_copy::sync(dir, url, delete_dst).await, - SyncOperation::Pull => az_copy::sync(url, dir, delete_dst).await, + let dir = &self.path.join(""); + if let Some(dest) = self.url.as_file_path() { + debug!("syncing {:?} {}", operation, dest.display()); + match operation { + SyncOperation::Push => { + sync( + SyncPath::dir(dir), + SyncPath::dir(dest.as_path()), + delete_dst, + ) + .await + } + SyncOperation::Pull => { + sync( + SyncPath::dir(dest.as_path()), + SyncPath::dir(dir), + delete_dst, + ) + .await + } + } + } else { + let url = self.url.url(); + let url = url.as_ref(); + debug!("syncing {:?} {}", operation, dir.display()); + match operation { + SyncOperation::Push => az_copy::sync(dir, url, delete_dst).await, + SyncOperation::Pull => az_copy::sync(url, dir, delete_dst).await, + } } } pub fn try_url(&self) -> Result<&BlobContainerUrl> { - let url = match &self.url { - Some(x) => x, - None => bail!("missing URL context"), - }; - Ok(url) + Ok(&self.url) } pub async fn init_pull(&self) -> Result<()> { @@ -57,6 +77,10 @@ impl SyncedDir { } pub async fn init(&self) -> Result<()> { + if let Some(remote_path) = self.url.as_file_path() { + fs::create_dir_all(remote_path).await?; + } + match fs::metadata(&self.path).await { Ok(m) => { if m.is_dir() { @@ -84,11 +108,6 @@ impl SyncedDir { operation: SyncOperation, delay_seconds: Option, ) -> Result<()> { - if self.url.is_none() { - debug!("not continuously syncing, as SyncDir does not have a remote URL"); - return Ok(()); - } - let delay_seconds = delay_seconds.unwrap_or(DEFAULT_CONTINUOUS_SYNC_DELAY_SECONDS); if delay_seconds == 0 { return Ok(()); @@ -101,27 +120,90 @@ impl SyncedDir { } } + // Conditionally upload a report, if it would not be a duplicate. + pub async fn upload(&self, name: &str, data: &T) -> Result { + match self.url.as_file_path() { + Some(path) => { + let path = path.join(name); + if !exists(&path).await? { + let data = serde_json::to_vec(&data)?; + fs::write(path, data).await?; + Ok(true) + } else { + Ok(false) + } + } + None => { + let url = self.url.blob(name).url(); + let blob = BlobClient::new(); + let result = blob + .put(url.clone()) + .json(data) + // Conditional PUT, only if-not-exists. + // https://docs.microsoft.com/en-us/rest/api/storageservices/specifying-conditional-headers-for-blob-service-operations + .header("If-None-Match", "*") + .send_retry_default() + .await + .context("Uploading blob")?; + + Ok(result.status() == StatusCode::CREATED) + } + } + } + async fn file_monitor_event(&self, event: Event) -> Result<()> { debug!("monitoring {}", self.path.display()); let mut monitor = DirectoryMonitor::new(self.path.clone()); monitor.start()?; - let mut uploader = self.url.as_ref().map(|x| BlobUploader::new(x.url())); + if let Some(path) = self.url.as_file_path() { + fs::create_dir_all(&path).await?; + + while let Some(item) = monitor.next().await { + event!(event.clone(); EventData::Path = item.display().to_string()); + let file_name = item + .file_name() + .ok_or_else(|| anyhow!("invalid file path"))?; + let destination = path.join(file_name); + if let Err(err) = fs::copy(&item, &destination).await { + let error_message = format!( + "Couldn't upload file. path:{:?} dir:{:?} err:{}", + item, destination, err + ); + + if !item.exists() { + // guarding against cases where a temporary file was detected + // but was deleted before the copy + warn!("{}", error_message); + continue; + } + bail!("{}", error_message); + } + } + } else { + let mut uploader = BlobUploader::new(self.url.url().clone()); + + while let Some(item) = monitor.next().await { + event!(event.clone(); EventData::Path = item.display().to_string()); - while let Some(item) = monitor.next().await { - event!(event.clone(); EventData::Path = item.display().to_string()); - if let Some(uploader) = &mut uploader { if let Err(err) = uploader.upload(item.clone()).await { - bail!( + let error_message = format!( "Couldn't upload file. path:{} dir:{} err:{}", item.display(), self.path.display(), err ); + + if !item.exists() { + // guarding against cases where a temporary file was detected + // but was deleted before the upload + warn!("{}", error_message); + continue; + } + bail!("{}", error_message); } } } - Ok(()) } @@ -150,29 +232,11 @@ impl SyncedDir { } } -impl From for SyncedDir { - fn from(path: PathBuf) -> Self { - Self { path, url: None } - } -} - pub async fn continuous_sync( dirs: &[SyncedDir], operation: SyncOperation, delay_seconds: Option, ) -> Result<()> { - let mut should_loop = false; - for dir in dirs { - if dir.url.is_some() { - should_loop = true; - break; - } - } - if !should_loop { - debug!("not syncing as SyncDirs do not have remote URLs"); - return Ok(()); - } - let delay_seconds = delay_seconds.unwrap_or(DEFAULT_CONTINUOUS_SYNC_DELAY_SECONDS); if delay_seconds == 0 { return Ok(()); diff --git a/src/agent/onefuzz/src/utils.rs b/src/agent/onefuzz/src/utils.rs index d610702fb..17e257b2b 100644 --- a/src/agent/onefuzz/src/utils.rs +++ b/src/agent/onefuzz/src/utils.rs @@ -2,6 +2,7 @@ // Licensed under the MIT License. use crate::jitter::delay_with_jitter; +use anyhow::Result; use async_trait::async_trait; use std::time::Duration; @@ -20,3 +21,136 @@ impl CheckNotify for tokio::sync::Notify { } } } + +/// wait on all join handles until they all return a success value or +/// the first failure. +pub async fn try_wait_all_join_handles( + handles: Vec>>, +) -> Result<()> { + let mut tasks = handles; + loop { + let (result, _, remaining_tasks) = futures::future::select_all(tasks).await; + result??; + + if remaining_tasks.is_empty() { + return Ok(()); + } else { + tasks = remaining_tasks + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use anyhow::Result; + use futures::*; + use std::sync::Arc; + use tokio::{spawn, sync::Notify, task::JoinHandle, time::delay_for}; + + fn spawn_ok() -> (Arc, JoinHandle>) { + let notify = Arc::new(Notify::new()); + + let notify_clone = notify.clone(); + let handle = spawn(async move { + notify_clone.notified().await; + Ok(()) + }); + (notify, handle) + } + + fn spawn_err() -> (Arc, JoinHandle>) { + let notify = Arc::new(Notify::new()); + + let notify_clone = notify.clone(); + let handle = spawn(async move { + notify_clone.notified().await; + bail!("error") + }); + (notify, handle) + } + + #[tokio::test] + async fn test_pending_when_no_return() { + let (_notify1, handle1) = spawn_ok(); + let (_notify2, handle2) = spawn_ok(); + let (_notify3, handle3) = spawn_ok(); + + let try_wait_handle = try_wait_all_join_handles(vec![handle1, handle2, handle3]); + delay_for(Duration::from_secs(1)).await; + assert!( + try_wait_handle.now_or_never().is_none(), + "expected no result" + ); + } + + #[tokio::test] + async fn test_pending_when_some_return() { + let (notify1, handle1) = spawn_ok(); + let (notify2, handle2) = spawn_ok(); + let (_notify3, handle3) = spawn_ok(); + + let try_wait_handle = try_wait_all_join_handles(vec![handle1, handle2, handle3]); + + notify1.notify(); + notify2.notify(); + delay_for(Duration::from_secs(1)).await; + assert!( + try_wait_handle.now_or_never().is_none(), + "expected no result" + ); + } + + #[tokio::test] + async fn test_ready_when_all_return() { + let (notify1, handle1) = spawn_ok(); + let (notify2, handle2) = spawn_ok(); + let (notify3, handle3) = spawn_ok(); + + let try_wait_handle = try_wait_all_join_handles(vec![handle1, handle2, handle3]); + + notify1.notify(); + notify2.notify(); + notify3.notify(); + delay_for(Duration::from_secs(1)).await; + if let Some(result) = try_wait_handle.now_or_never() { + assert!(result.is_ok(), "expected Ok") + } else { + assert!(false, "expected result") + } + } + + #[tokio::test] + async fn test_pending_on_no_failure() { + let (notify1, handle1) = spawn_ok(); + let (_notify2, handle2) = spawn_err(); + let (_notify3, handle3) = spawn_ok(); + + let try_wait_handle = try_wait_all_join_handles(vec![handle1, handle2, handle3]); + + notify1.notify(); + delay_for(Duration::from_secs(1)).await; + assert!( + try_wait_handle.now_or_never().is_none(), + "expected no result" + ); + } + + #[tokio::test] + async fn test_pending_on_first_failure() { + let (_notify1, handle1) = spawn_ok(); + let (notify2, handle2) = spawn_err(); + let (_notify3, handle3) = spawn_ok(); + + let try_wait_handle = try_wait_all_join_handles(vec![handle1, handle2, handle3]); + + notify2.notify(); + + delay_for(Duration::from_secs(1)).await; + if let Some(result) = try_wait_handle.now_or_never() { + assert!(result.is_err(), "expected error") + } else { + assert!(false, "expected result") + } + } +} diff --git a/src/agent/reqwest-retry/Cargo.toml b/src/agent/reqwest-retry/Cargo.toml index 1f90de7d7..ec734a8d3 100644 --- a/src/agent/reqwest-retry/Cargo.toml +++ b/src/agent/reqwest-retry/Cargo.toml @@ -9,7 +9,8 @@ license = "MIT" anyhow = "1.0" async-trait = "0.1" reqwest = { version = "0.10", features = ["json", "stream"] } -backoff = { version = "0.2", features = ["async-std"] } +backoff = { version = "0.3", features = ["async-std"] } +log = "0.4" onefuzz-telemetry = { path = "../onefuzz-telemetry" } [dev-dependencies] diff --git a/src/agent/reqwest-retry/src/lib.rs b/src/agent/reqwest-retry/src/lib.rs index 1cf64c9ec..3ca7d914c 100644 --- a/src/agent/reqwest-retry/src/lib.rs +++ b/src/agent/reqwest-retry/src/lib.rs @@ -3,7 +3,7 @@ use anyhow::{format_err, Result}; use async_trait::async_trait; -use backoff::{self, future::FutureOperation, ExponentialBackoff}; +use backoff::{self, future::retry_notify, ExponentialBackoff}; use onefuzz_telemetry::warn; use reqwest::Response; use std::{ @@ -43,16 +43,16 @@ pub async fn send_retry_reqwest Result + Sen Ok(response) } }; - let result = op - .retry_notify( - ExponentialBackoff { - current_interval: retry_period, - initial_interval: retry_period, - ..ExponentialBackoff::default() - }, - |err, dur| warn!("request attempt failed after {:?}: {}", dur, err), - ) - .await?; + let result = retry_notify( + ExponentialBackoff { + current_interval: retry_period, + initial_interval: retry_period, + ..ExponentialBackoff::default() + }, + op, + |err, dur| warn!("request attempt failed after {:?}: {}", dur, err), + ) + .await?; Ok(result) } diff --git a/src/agent/storage-queue/Cargo.toml b/src/agent/storage-queue/Cargo.toml index 3ddcd9ce1..5295fc9ff 100644 --- a/src/agent/storage-queue/Cargo.toml +++ b/src/agent/storage-queue/Cargo.toml @@ -8,12 +8,19 @@ license = "MIT" [dependencies] anyhow = "1.0" async-trait = "0.1" +backoff = { version = "0.3", features = ["async-std"] } +base64 = "0.13" +bytes = "0.5" +derivative = "2.2.0" +regex = "1.4" reqwest = { version = "0.10", features = ["json", "stream"] } -serde = "1.0" +reqwest-retry = { path = "../reqwest-retry" } +serde = { version = "1.0", features = ["derive"]} +serde_derive = "1.0" serde_json = "1.0" serde-xml-rs = "0.4" -serde_derive = "1.0" +tokio = { version = "0.2", features = ["full"] } +queue-file = "1.1" uuid = { version = "0.8", features = ["serde", "v4"] } -regex = "1.4" -base64 = "0.13" -reqwest-retry = { path = "../reqwest-retry" } +yaque = "0.5.1" + diff --git a/src/agent/storage-queue/src/azure_queue.rs b/src/agent/storage-queue/src/azure_queue.rs new file mode 100644 index 000000000..4858a99ef --- /dev/null +++ b/src/agent/storage-queue/src/azure_queue.rs @@ -0,0 +1,167 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use anyhow::{Context, Result}; +use bytes::buf::BufExt; +use reqwest::{Client, Url}; +use reqwest_retry::SendRetry; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use std::time::Duration; +use uuid::Uuid; + +pub const EMPTY_QUEUE_DELAY: Duration = Duration::from_secs(10); + +// +// +// 7d35e47d-f58e-42da-ba4a-9e6ac7e1214d +// Fri, 05 Feb 2021 06:27:47 GMT +// Fri, 12 Feb 2021 06:27:47 GMT +// AgAAAAMAAAAAAAAAtg40eYj71gE= +// Fri, 05 Feb 2021 06:31:02 GMT +// 1 +// dGVzdA== +// +// + +// #[derive(Derivative)] +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "PascalCase")] +#[serde(rename = "QueueMessage")] +pub struct AzureQueueMessage { + pub message_id: Uuid, + // InsertionTime: + // ExpirationTime + pub pop_receipt: String, + // TimeNextVisible + // DequeueCount + pub message_text: String, + + #[serde(skip)] + pub messages_url: Option, +} + +impl AzureQueueMessage { + pub fn parse(&self, parser: impl FnOnce(&[u8]) -> Result) -> Result { + let decoded = base64::decode(&self.message_text)?; + parser(&decoded) + } + + pub async fn claim(self) -> Result { + if let Some(messages_url) = self.messages_url { + let messages_path = messages_url.path(); + let item_path = format!("{}/{}", messages_path, self.message_id); + let mut url = messages_url.clone(); + url.set_path(&item_path); + url.query_pairs_mut() + .append_pair("popreceipt", &self.pop_receipt); + + let http = Client::new(); + http.delete(url) + .send_retry_default() + .await? + .error_for_status()?; + } + let decoded = base64::decode(self.message_text)?; + let value: T = serde_json::from_slice(&decoded)?; + Ok(value) + } + pub async fn delete(&self) -> Result<()> { + if let Some(messages_url) = self.messages_url.clone() { + let messages_path = messages_url.path(); + let item_path = format!("{}/{}", messages_path, self.message_id); + let mut url = messages_url.clone(); + url.set_path(&item_path); + url.query_pairs_mut() + .append_pair("popreceipt", &self.pop_receipt); + + let http = Client::new(); + http.delete(url) + .send_retry_default() + .await + .context("storage queue delete failed")? + .error_for_status() + .context("storage queue delete failed")?; + } + + Ok(()) + } + + pub fn get(&self) -> Result { + let decoded = base64::decode(&self.message_text)?; + let value = serde_json::from_slice(&decoded)?; + Ok(value) + } +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "PascalCase")] +#[serde(rename = "QueueMessagesList")] +struct AzureQueueMessageList { + #[serde(rename = "QueueMessage", default)] + pub queue_message: Option, +} + +#[derive(Debug, Clone)] +pub struct AzureQueueClient { + pub http: Client, + pub messages_url: Url, +} + +impl AzureQueueClient { + pub fn new(queue_url: Url) -> Self { + let http = Client::new(); + let messages_url = { + let queue_path = queue_url.path(); + let messages_path = format!("{}/messages", queue_path); + let mut url = queue_url; + url.set_path(&messages_path); + url + }; + AzureQueueClient { http, messages_url } + } + + pub async fn enqueue(&self, data: impl Serialize) -> Result<()> { + let serialized = serde_json::to_string(&data).unwrap(); + let body = serde_xml_rs::to_string(&base64::encode(&serialized)).unwrap(); + let r = self + .http + .post(self.messages_url.clone()) + .body(body) + .send_retry_default() + .await + .context("storage queue enqueue failed")?; + let _ = r + .error_for_status() + .context("storage queue enqueue failed with error")?; + Ok(()) + } + + pub async fn pop(&self) -> Result> { + let response = self + .http + .get(self.messages_url.clone()) + .send_retry_default() + .await + .context("storage queue pop failed")? + .error_for_status() + .context("storage queue pop failed with error")?; + + let buf = { + let buf = response.bytes().await?; + //remove the byte order mark if present + if buf.slice(0..3).to_vec() == [0xef, 0xbb, 0xbf] { + buf.slice(3..) + } else { + buf + } + }; + + let msg: AzureQueueMessageList = serde_xml_rs::from_reader(buf.reader())?; + + let m = msg.queue_message.map(|msg| AzureQueueMessage { + messages_url: Some(self.messages_url.clone()), + ..msg + }); + Ok(m) + } +} diff --git a/src/agent/storage-queue/src/lib.rs b/src/agent/storage-queue/src/lib.rs index facc2b426..1b1e6c8d0 100644 --- a/src/agent/storage-queue/src/lib.rs +++ b/src/agent/storage-queue/src/lib.rs @@ -1,200 +1,139 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -use anyhow::{bail, Context, Result}; -use reqwest::{Client, Url}; -use reqwest_retry::SendRetry; -use serde::{Deserialize, Serialize}; +use anyhow::{anyhow, Result}; +use reqwest::Url; +use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize}; use std::time::Duration; use uuid::Uuid; pub const EMPTY_QUEUE_DELAY: Duration = Duration::from_secs(10); +pub mod azure_queue; +pub mod local_queue; -#[derive(Debug, Deserialize, Serialize)] -#[serde(rename_all = "PascalCase")] -struct QueueMessage { - message_text: Option, +use azure_queue::{AzureQueueClient, AzureQueueMessage}; +use local_queue::{ChannelQueueClient, FileQueueClient, LocalQueueMessage}; + +#[derive(Debug, Clone)] +pub enum QueueClient { + AzureQueue(AzureQueueClient), + FileQueueClient(Box), + Channel(ChannelQueueClient), } -pub struct QueueClient { - http: Client, - messages_url: Url, +impl<'de> Deserialize<'de> for QueueClient { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + Url::deserialize(deserializer) + .map(QueueClient::new)? + .map_err(serde::de::Error::custom) + } } impl QueueClient { - pub fn new(queue_url: Url) -> Self { - let http = Client::new(); + pub fn new(queue_url: Url) -> Result { + if queue_url.scheme().to_lowercase() == "file" { + let path = queue_url + .to_file_path() + .map_err(|_| anyhow!("invalid local path"))?; + let local_queue = FileQueueClient::new(path)?; + Ok(QueueClient::FileQueueClient(Box::new(local_queue))) + } else { + Ok(QueueClient::AzureQueue(AzureQueueClient::new(queue_url))) + } + } - let messages_url = { - let queue_path = queue_url.path(); - let messages_path = format!("{}/messages", queue_path); - let mut url = queue_url; - url.set_path(&messages_path); - url - }; - - Self { http, messages_url } + pub fn get_url(self) -> Result { + match self { + QueueClient::AzureQueue(queue_client) => Ok(queue_client.messages_url), + QueueClient::FileQueueClient(queue_client) => { + Url::from_file_path(queue_client.as_ref().path.clone()) + .map_err(|_| anyhow!("invalid queue url")) + } + QueueClient::Channel(queue_client) => Ok(queue_client.url), + } } pub async fn enqueue(&self, data: impl Serialize) -> Result<()> { - let serialized = serde_json::to_string(&data).unwrap(); - let queue_message = QueueMessage { - message_text: Some(base64::encode(&serialized)), - }; - let body = serde_xml_rs::to_string(&queue_message).unwrap(); - let r = self - .http - .post(self.messages_url()) - .body(body) - .send_retry_default() - .await - .context("storage queue enqueue failed")?; - let _ = r - .error_for_status() - .context("storage queue enqueue failed with error")?; - Ok(()) + match self { + QueueClient::AzureQueue(queue_client) => queue_client.enqueue(data).await, + QueueClient::FileQueueClient(queue_client) => queue_client.enqueue(data).await, + QueueClient::Channel(queue_client) => queue_client.enqueue(data).await, + } } - pub async fn pop(&mut self) -> Result> { - let response = self - .http - .get(self.messages_url()) - .send_retry_default() - .await - .context("storage queue pop failed")? - .error_for_status() - .context("storage queue pop failed with error")?; - let text = response - .text() - .await - .context("unable to parse response text")?; - let msg = Message::parse(&text); - - let msg = if let Some(msg) = msg { - msg - } else { - if is_empty_message(&text) { - return Ok(None); + pub async fn pop(&self) -> Result> { + match self { + QueueClient::AzureQueue(queue_client) => { + let message = queue_client.pop().await?; + Ok(message.map(Message::QueueMessage)) } - bail!("unable to parse response text body: {}", text); - }; - - let msg = if msg.data.is_empty() { None } else { Some(msg) }; - - Ok(msg) - } - - pub async fn delete(&mut self, receipt: impl Into) -> Result<()> { - let receipt = receipt.into(); - let url = self.delete_url(receipt); - self.http - .delete(url) - .send_retry_default() - .await - .context("storage queue delete failed")? - .error_for_status() - .context("storage queue delete failed")?; - Ok(()) - } - - fn delete_url(&self, receipt: Receipt) -> Url { - let messages_url = self.messages_url(); - let messages_path = messages_url.path(); - let item_path = format!("{}/{}", messages_path, receipt.message_id); - let mut url = messages_url; - url.set_path(&item_path); - url.query_pairs_mut() - .append_pair("popreceipt", &receipt.pop_receipt); - url - } - - fn messages_url(&self) -> Url { - self.messages_url.clone() + QueueClient::FileQueueClient(queue_client) => { + let message = queue_client.pop().await?; + Ok(message.map(Message::LocalQueueMessage)) + } + QueueClient::Channel(queue_client) => { + let message = queue_client.pop().await?; + Ok(message.map(Message::LocalQueueMessage)) + } + } } } -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Receipt { - // Unique ID of the associated queue message. - pub message_id: Uuid, - - // Opaque data that licenses message deletion. - pub pop_receipt: String, -} - -impl From for Receipt { - fn from(msg: Message) -> Self { - msg.receipt - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Message { - pub receipt: Receipt, - pub data: Vec, +#[derive(Debug)] +pub enum Message { + QueueMessage(AzureQueueMessage), + LocalQueueMessage(LocalQueueMessage), } impl Message { + pub fn get(&self) -> Result { + match self { + Message::QueueMessage(message) => { + let data = message.get()?; + Ok(data) + } + Message::LocalQueueMessage(message) => Ok(serde_json::from_slice(&*message.data)?), + } + } + + pub async fn claim(self) -> Result { + match self { + Message::QueueMessage(message) => Ok(message.claim().await?), + Message::LocalQueueMessage(message) => Ok(serde_json::from_slice(&message.data)?), + } + } + + pub async fn delete(&self) -> Result<()> { + match self { + Message::QueueMessage(message) => Ok(message.delete().await?), + Message::LocalQueueMessage(_) => Ok(()), + } + } + + pub fn parse(&self, parser: impl FnOnce(&[u8]) -> Result) -> Result { + match self { + Message::QueueMessage(message) => message.parse(parser), + Message::LocalQueueMessage(message) => parser(&*message.data), + } + } + + pub fn update_url(self, new_url: Url) -> Self { + match self { + Message::QueueMessage(message) => Message::QueueMessage(AzureQueueMessage { + messages_url: Some(new_url), + ..message + }), + m => m, + } + } + pub fn id(&self) -> Uuid { - self.receipt.message_id - } - - pub fn data(&self) -> &[u8] { - &self.data - } - - fn parse(text: &str) -> Option { - let message_id = parse_message_id(text)?; - let pop_receipt = parse_pop_receipt(text)?; - let receipt = Receipt { - message_id, - pop_receipt, - }; - let data = parse_data(text)?; - - let msg = Self { receipt, data }; - - Some(msg) - } - - pub fn get<'a, T: serde::de::Deserialize<'a>>(&'a self) -> Result { - let data = - serde_json::from_slice(&self.data).context("get storage queue message failed")?; - Ok(data) + match self { + Message::QueueMessage(message) => message.message_id, + Message::LocalQueueMessage(_message) => Uuid::default(), + } } } - -fn is_empty_message(text: &str) -> bool { - regex::Regex::new(r".*[\s\n\r]*") - .unwrap() - .is_match(&text) - || text.contains(r"") -} - -fn parse_message_id(text: &str) -> Option { - let pat = r"(.*)"; - let re = regex::Regex::new(pat).unwrap(); - - let msg_id_text = re.captures_iter(text).next()?.get(1)?.as_str(); - - Uuid::parse_str(msg_id_text).ok() -} - -fn parse_pop_receipt(text: &str) -> Option { - let pat = r"(.*)"; - let re = regex::Regex::new(pat).unwrap(); - - let text = re.captures_iter(text).next()?.get(1)?.as_str().into(); - - Some(text) -} - -fn parse_data(text: &str) -> Option> { - let pat = r"(.*)"; - let re = regex::Regex::new(pat).unwrap(); - - let encoded = re.captures_iter(text).next()?.get(1)?.as_str(); - let decoded = base64::decode(encoded).ok()?; - - Some(decoded) -} diff --git a/src/agent/storage-queue/src/local_queue.rs b/src/agent/storage-queue/src/local_queue.rs new file mode 100644 index 000000000..9698bedf6 --- /dev/null +++ b/src/agent/storage-queue/src/local_queue.rs @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use anyhow::{anyhow, Result}; +use backoff::{future::retry_notify, ExponentialBackoff}; +use queue_file::QueueFile; +use serde::Serialize; +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +pub const EMPTY_QUEUE_DELAY: Duration = Duration::from_secs(10); +pub const SEND_RETRY_DELAY: Duration = Duration::from_millis(500); +pub const RECEIVE_RETRY_DELAY: Duration = Duration::from_millis(500); +pub const MAX_SEND_ATTEMPTS: i32 = 5; +pub const MAX_RECEIVE_ATTEMPTS: i32 = 5; +pub const MAX_ELAPSED_TIME: Duration = Duration::from_secs(2 * 60); + +pub struct LocalQueueMessage { + pub data: Vec, +} + +impl std::fmt::Debug for LocalQueueMessage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", std::str::from_utf8(&self.data).unwrap()) + } +} + +/// File backed queue +#[derive(Debug, Clone)] +pub struct FileQueueClient { + queue: Arc>, + pub path: PathBuf, +} + +impl FileQueueClient { + pub fn new(queue_url: PathBuf) -> Result { + let queue = Arc::new(Mutex::new( + queue_file::QueueFile::open(queue_url.clone()) + .map_err(|err| anyhow!("cannot open queue file {:?} : {}", queue_url, err))?, + )); + + Ok(FileQueueClient { + queue, + path: queue_url, + }) + } + + pub async fn enqueue(&self, data: impl Serialize) -> Result<()> { + let send_data = || async { + let mut buffer = Vec::new(); + serde_xml_rs::to_writer(&mut buffer, &data) + .map_err(|_| anyhow::anyhow!("unable to deserialize"))?; + let mut locked_q = self + .queue + .lock() + .map_err(|_| anyhow::anyhow!("unable to acquire lock"))?; + locked_q + .add(buffer.as_slice()) + .map_err(|_| anyhow::anyhow!("unable to queue message"))?; + Ok(()) + }; + + let backoff = ExponentialBackoff { + current_interval: SEND_RETRY_DELAY, + initial_interval: SEND_RETRY_DELAY, + max_elapsed_time: Some(MAX_ELAPSED_TIME), + ..ExponentialBackoff::default() + }; + let notify = |err, _| println!("IO error: {}", err); + retry_notify(backoff, send_data, notify).await?; + + Ok(()) + } + + pub async fn pop(&self) -> Result> { + let receive_data = || async { + let mut locked_q = self + .queue + .lock() + .map_err(|_| anyhow::anyhow!("unable to acquire lock"))?; + let data = locked_q + .peek() + .map_err(|_| anyhow::anyhow!("unable to peek"))?; + locked_q + .remove() + .map_err(|_| anyhow::anyhow!("unable to pop message"))?; + + let message = data.map(|d| LocalQueueMessage { data: d.to_vec() }); + Ok(message) + }; + + let backoff = ExponentialBackoff { + current_interval: SEND_RETRY_DELAY, + initial_interval: SEND_RETRY_DELAY, + max_elapsed_time: Some(MAX_ELAPSED_TIME), + ..ExponentialBackoff::default() + }; + let notify = |err, _| println!("IO error: {}", err); + let result = retry_notify(backoff, receive_data, notify).await?; + + Ok(result) + } +} + +use tokio::sync::mpsc::{ + error::TryRecvError, unbounded_channel, UnboundedReceiver, UnboundedSender, +}; + +/// Queue based on mpsc channel +#[derive(Debug, Clone)] +pub struct ChannelQueueClient { + sender: Arc>>>, + receiver: Arc>>>, + pub url: reqwest::Url, +} + +impl ChannelQueueClient { + pub fn new() -> Result { + let (sender, receiver) = unbounded_channel(); + Ok(ChannelQueueClient { + sender: Arc::new(Mutex::new(sender)), + receiver: Arc::new(Mutex::new(receiver)), + url: reqwest::Url::parse("mpsc://channel")?, + }) + } + + pub async fn enqueue(&self, data: impl Serialize) -> Result<()> { + let sender = self + .sender + .lock() + .map_err(|_| anyhow::anyhow!("unable to acquire lock"))?; + let mut buffer = Vec::new(); + serde_xml_rs::to_writer(&mut buffer, &data) + .map_err(|_| anyhow::anyhow!("unable to deserialize"))?; + sender.send(buffer)?; + Ok(()) + } + + pub async fn pop(&self) -> Result> { + let mut receiver = self + .receiver + .lock() + .map_err(|_| anyhow::anyhow!("unable to acquire lock"))?; + + match receiver.try_recv() { + Ok(data) => Ok(Some(LocalQueueMessage { data })), + Err(TryRecvError::Empty) => Ok(None), + Err(err) => Err(err.into()), + } + } +} diff --git a/src/proxy-manager/Cargo.lock b/src/proxy-manager/Cargo.lock index 216d3f138..225f70df2 100644 --- a/src/proxy-manager/Cargo.lock +++ b/src/proxy-manager/Cargo.lock @@ -24,6 +24,12 @@ version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1" +[[package]] +name = "anymap" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33954243bd79057c2de7338850b85983a44588021f8a5fee574a8888c6de4344" + [[package]] name = "appinsights" version = "0.1.5" @@ -31,7 +37,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ae436410b1221062849ced02d4acd4193cbe2f27da551bc0053dfdf3a66edf6" dependencies = [ "chrono", - "crossbeam-channel", + "crossbeam-channel 0.4.4", "hostname", "http", "log", @@ -191,15 +197,16 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "backoff" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721c249ab59cbc483ad4294c9ee2671835c1e43e9ffc277e6b4ecfef733cfdc5" +checksum = "9fe17f59a06fe8b87a6fc8bf53bb70b3aba76d7685f432487a68cd5552853625" dependencies = [ "async-std", "futures-core", + "getrandom 0.2.2", "instant", - "pin-project 0.4.27", - "rand 0.7.3", + "pin-project", + "rand 0.8.3", ] [[package]] @@ -333,6 +340,40 @@ dependencies = [ "maybe-uninit", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils 0.8.3", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch", + "crossbeam-utils 0.8.3", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2584f639eb95fea8c798496315b297cf81b9b58b6d30ab066a75455333cf4b12" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils 0.8.3", + "lazy_static", + "memoffset", + "scopeguard", +] + [[package]] name = "crossbeam-utils" version = "0.7.2" @@ -365,6 +406,29 @@ dependencies = [ "syn 1.0.60", ] +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.9", + "syn 1.0.60", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + [[package]] name = "encoding_rs" version = "0.8.28" @@ -402,6 +466,18 @@ dependencies = [ "instant", ] +[[package]] +name = "filetime" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "redox_syscall", + "winapi 0.3.9", +] + [[package]] name = "fnv" version = "1.0.7" @@ -433,6 +509,25 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fsevent" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97f347202c95c98805c216f9e1df210e8ebaec9fdb2365700a43c10797a35e63" +dependencies = [ + "bitflags", + "fsevent-sys", +] + +[[package]] +name = "fsevent-sys" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a29c77f1ca394c3e73a9a5d24cfcabb734682d9634fc398f2204a63c994120" +dependencies = [ + "libc", +] + [[package]] name = "fuchsia-zircon" version = "0.3.3" @@ -692,7 +787,7 @@ dependencies = [ "httparse", "httpdate", "itoa", - "pin-project 1.0.5", + "pin-project", "socket2", "tokio", "tower-service", @@ -734,6 +829,26 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "inotify" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d19f57db1baad9d09e43a3cd76dcf82ebdafd37d75c9498b87762dba77c93f15" +dependencies = [ + "bitflags", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + [[package]] name = "instant" version = "0.1.9" @@ -838,6 +953,15 @@ version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +[[package]] +name = "memoffset" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.16" @@ -873,6 +997,19 @@ dependencies = [ "winapi 0.2.8", ] +[[package]] +name = "mio" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5dede4e2065b3842b8b0af444119f3aa331cc7cc2dd20388bfb0f5d5a38823a" +dependencies = [ + "libc", + "log", + "miow 0.3.6", + "ntapi", + "winapi 0.3.9", +] + [[package]] name = "mio-named-pipes" version = "0.1.7" @@ -880,7 +1017,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656" dependencies = [ "log", - "mio", + "mio 0.6.23", "miow 0.3.6", "winapi 0.3.9", ] @@ -893,7 +1030,7 @@ checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0" dependencies = [ "iovec", "libc", - "mio", + "mio 0.6.23", ] [[package]] @@ -957,6 +1094,34 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "notify" +version = "5.0.0-pre.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5fd82b93434edb9c00ae65ee741e0e081cdc8c63346ab9f687935a629aaf4c3" +dependencies = [ + "anymap", + "bitflags", + "crossbeam-channel 0.5.0", + "filetime", + "fsevent", + "fsevent-sys", + "inotify", + "libc", + "mio 0.7.9", + "walkdir", + "winapi 0.3.9", +] + +[[package]] +name = "ntapi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" +dependencies = [ + "winapi 0.3.9", +] + [[package]] name = "num-integer" version = "0.1.44" @@ -1086,33 +1251,13 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" -[[package]] -name = "pin-project" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffbc8e94b38ea3d2d8ba92aea2983b503cd75d0888d75b86bb37970b5698e15" -dependencies = [ - "pin-project-internal 0.4.27", -] - [[package]] name = "pin-project" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96fa8ebb90271c4477f144354485b8068bd8f6b78b428b01ba892ca26caf0b63" dependencies = [ - "pin-project-internal 1.0.5", -] - -[[package]] -name = "pin-project-internal" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65ad2ae56b6abe3a1ee25f15ee605bacadb9a764edaba9c2bf4103800d4a1895" -dependencies = [ - "proc-macro2 1.0.24", - "quote 1.0.9", - "syn 1.0.60", + "pin-project-internal", ] [[package]] @@ -1199,6 +1344,16 @@ dependencies = [ "unicode-xid 0.2.1", ] +[[package]] +name = "queue-file" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82adc209678e4bb644900ccf43dc11e041a940e1f0c332a66985d01e02ca7451" +dependencies = [ + "bytes 0.5.6", + "snafu", +] + [[package]] name = "quote" version = "0.6.13" @@ -1298,6 +1453,31 @@ dependencies = [ "rand_core 0.6.2", ] +[[package]] +name = "rayon" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +dependencies = [ + "crossbeam-channel 0.5.0", + "crossbeam-deque", + "crossbeam-utils 0.8.3", + "lazy_static", + "num_cpus", +] + [[package]] name = "redox_syscall" version = "0.2.5" @@ -1378,6 +1558,7 @@ dependencies = [ "async-trait", "backoff", "log", + "onefuzz-telemetry", "reqwest", ] @@ -1387,6 +1568,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.19" @@ -1397,6 +1587,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "security-framework" version = "2.1.1" @@ -1510,6 +1706,27 @@ dependencies = [ "syn 0.15.44", ] +[[package]] +name = "snafu" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7" +dependencies = [ + "doc-comment", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.9", + "syn 1.0.60", +] + [[package]] name = "socket2" version = "0.3.19" @@ -1527,7 +1744,11 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "backoff", "base64", + "bytes 0.5.6", + "derivative", + "queue-file", "regex", "reqwest", "reqwest-retry", @@ -1535,7 +1756,9 @@ dependencies = [ "serde-xml-rs", "serde_derive", "serde_json", + "tokio", "uuid", + "yaque", ] [[package]] @@ -1566,6 +1789,21 @@ dependencies = [ "unicode-xid 0.2.1", ] +[[package]] +name = "sysinfo" +version = "0.14.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2983daff11a197c7c406b130579bc362177aa54cf2cc1f34d6ac88fccaa6a5e1" +dependencies = [ + "cfg-if 0.1.10", + "doc-comment", + "libc", + "ntapi", + "once_cell", + "rayon", + "winapi 0.3.9", +] + [[package]] name = "tempfile" version = "3.2.0" @@ -1665,7 +1903,7 @@ dependencies = [ "lazy_static", "libc", "memchr", - "mio", + "mio 0.6.23", "mio-named-pipes", "mio-uds", "num_cpus", @@ -1744,7 +1982,7 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" dependencies = [ - "pin-project 1.0.5", + "pin-project", "tracing", ] @@ -1861,6 +2099,17 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" +[[package]] +name = "walkdir" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" +dependencies = [ + "same-file", + "winapi 0.3.9", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" @@ -2037,3 +2286,17 @@ name = "xml-rs" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a" + +[[package]] +name = "yaque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "543707de19373df21757dc231c46407701d0b05a8067542584ea5c6fa8602725" +dependencies = [ + "futures", + "lazy_static", + "log", + "notify", + "rand 0.7.3", + "sysinfo", +] diff --git a/src/proxy-manager/src/config.rs b/src/proxy-manager/src/config.rs index 6edad1d10..e3899f37d 100644 --- a/src/proxy-manager/src/config.rs +++ b/src/proxy-manager/src/config.rs @@ -143,7 +143,7 @@ impl Config { pub async fn notify(&self) -> Result<()> { info!("notifying service of proxy update"); - let client = QueueClient::new(self.data.notification.clone()); + let client = QueueClient::new(self.data.notification.clone())?; client .enqueue(NotifyResponse {