Shorten the expiry window for the work queue SAS URLs assigned at node registration (#416)

The underlying impact is that nodes must re-register on a more frequent basis.

Nodes find out they are out-of-date is during registration and immediately prior to starting a new set of work.  Requiring nodes re-register on a shortened cycle provides more opportunities for nodes to get re-imaged.

Additionally, this addresses an issue handling the SAS URL expiry in a more clean fashion in the supervisor.
This commit is contained in:
bmc-msft
2021-01-07 07:34:26 -05:00
committed by GitHub
parent 3b26ffef65
commit e799eb03cd
5 changed files with 26 additions and 18 deletions

View File

@ -2,8 +2,10 @@
// Licensed under the MIT License.
use anyhow::Result;
use onefuzz::{http::ResponseExt, jitter::delay_with_jitter};
use reqwest::StatusCode;
use onefuzz::{
http::{is_auth_error_code, ResponseExt},
jitter::delay_with_jitter,
};
use reqwest_retry::SendRetry;
use std::{
path::{Path, PathBuf},
@ -228,7 +230,7 @@ impl Registration {
machine_id,
});
}
Err(err) if status_code == StatusCode::UNAUTHORIZED => {
Err(err) if is_auth_error_code(status_code) => {
warn!(
"Registration failed: {}\n retrying in {} seconds",
err,
@ -264,6 +266,7 @@ impl Registration {
}
pub async fn renew(&mut self) -> Result<()> {
info!("renewing registration");
let token = self.config.credentials.access_token().await?;
let machine_id = self.machine_id.to_string();

View File

@ -6,7 +6,7 @@ use std::path::PathBuf;
use anyhow::Result;
use downcast_rs::Downcast;
use onefuzz::blob::BlobContainerUrl;
use onefuzz::{blob::BlobContainerUrl, http::is_auth_error};
use storage_queue::QueueClient;
use tokio::fs;
use uuid::Uuid;
@ -189,17 +189,5 @@ impl WorkQueue {
}
}
fn is_auth_error(err: &anyhow::Error) -> bool {
use reqwest::StatusCode;
if let Some(err) = err.downcast_ref::<reqwest::Error>() {
if let Some(status) = err.status() {
return status == StatusCode::UNAUTHORIZED;
}
}
false
}
#[cfg(test)]
pub mod double;

View File

@ -3,7 +3,7 @@
use anyhow::{bail, Result};
use async_trait::async_trait;
use reqwest::Response;
use reqwest::{Response, StatusCode};
#[async_trait]
pub trait ResponseExt: Sized {
@ -31,3 +31,17 @@ impl ResponseExt for Response {
Ok(self)
}
}
pub fn is_auth_error(err: &anyhow::Error) -> bool {
if let Some(err) = err.downcast_ref::<reqwest::Error>() {
if let Some(status) = err.status() {
return is_auth_error_code(status);
}
}
false
}
pub fn is_auth_error_code(status: StatusCode) -> bool {
status == StatusCode::UNAUTHORIZED || status == StatusCode::FORBIDDEN
}

View File

@ -3,6 +3,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import datetime
import logging
from uuid import UUID
@ -30,6 +31,7 @@ def create_registration_response(machine_id: UUID, pool: Pool) -> func.HttpRespo
read=True,
update=True,
process=True,
duration=datetime.timedelta(hours=24),
)
return ok(
AgentRegistration(

View File

@ -48,11 +48,12 @@ def get_queue_sas(
add: bool = False,
update: bool = False,
process: bool = False,
duration: datetime.timedelta = datetime.timedelta(days=30),
) -> str:
account_id = get_primary_account(storage_type)
logging.debug("getting queue sas %s (account_id: %s)", queue, account_id)
name, key = get_storage_account_name_key(account_id)
expiry = datetime.datetime.utcnow() + datetime.timedelta(days=30)
expiry = datetime.datetime.utcnow() + duration
token = generate_queue_sas(
name,