mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-04-15 06:56:36 +00:00
Backoff on image download error
Change-type: patch Closes: #873 Signed-off-by: Cameron Diver <cameron@balena.io>
This commit is contained in:
parent
b606332312
commit
146267b402
@ -19,3 +19,5 @@ export class ResourceRecreationAttemptError extends TypedError {
|
||||
}
|
||||
|
||||
export class InvalidNetworkConfigurationError extends TypedError {}
|
||||
|
||||
export class ImageDownloadBackoffError extends TypedError {}
|
||||
|
@ -15,6 +15,7 @@ import { DeltaStillProcessingError, NotFoundError } from '../lib/errors';
|
||||
import * as LogTypes from '../lib/log-types';
|
||||
import * as validation from '../lib/validation';
|
||||
import Logger from '../logger';
|
||||
import { ImageDownloadBackoffError } from './errors';
|
||||
|
||||
interface ImageEvents {
|
||||
change: void;
|
||||
@ -64,6 +65,10 @@ export class Images extends (EventEmitter as {
|
||||
private logger: Logger;
|
||||
private db: Database;
|
||||
|
||||
private imageFetchFailures: Dictionary<number> = {};
|
||||
private imageFetchLastFailureTime: Dictionary<
|
||||
ReturnType<typeof process.hrtime>
|
||||
> = {};
|
||||
private imageCleanupFailures: Dictionary<number> = {};
|
||||
// A store of volatile state for images (e.g. download progress), indexed by imageId
|
||||
private volatileState: { [imageId: number]: Image } = {};
|
||||
@ -81,6 +86,24 @@ export class Images extends (EventEmitter as {
|
||||
opts: FetchOptions,
|
||||
onFinish = _.noop,
|
||||
): Promise<null> {
|
||||
if (this.imageFetchFailures[image.name] != null) {
|
||||
// If we are retrying a pull within the backoff time of the last failure,
|
||||
// we need to throw an error, which will be caught in the device-state
|
||||
// engine, and ensure that we wait a bit lnger
|
||||
const minDelay = Math.min(
|
||||
2 ** this.imageFetchFailures[image.name] * constants.backoffIncrement,
|
||||
constants.maxBackoffTime,
|
||||
);
|
||||
const timeSinceLastError = process.hrtime(
|
||||
this.imageFetchLastFailureTime[image.name],
|
||||
);
|
||||
const timeSinceLastErrorMs =
|
||||
timeSinceLastError[0] * 1000 + timeSinceLastError[1] / 1e6;
|
||||
if (timeSinceLastErrorMs < minDelay) {
|
||||
throw new ImageDownloadBackoffError();
|
||||
}
|
||||
}
|
||||
|
||||
const onProgress = (progress: FetchProgressEvent) => {
|
||||
// Only report the percentage if we haven't finished fetching
|
||||
if (this.volatileState[image.imageId] != null) {
|
||||
@ -108,6 +131,13 @@ export class Images extends (EventEmitter as {
|
||||
return null;
|
||||
} catch (e) {
|
||||
if (!NotFoundError(e)) {
|
||||
if (!(e instanceof ImageDownloadBackoffError)) {
|
||||
this.imageFetchLastFailureTime[image.name] = process.hrtime();
|
||||
this.imageFetchFailures[image.name] =
|
||||
this.imageFetchFailures[image.name] != null
|
||||
? this.imageFetchFailures[image.name] + 1
|
||||
: 1;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
this.reportChange(
|
||||
@ -130,6 +160,8 @@ export class Images extends (EventEmitter as {
|
||||
|
||||
this.logger.logSystemEvent(LogTypes.downloadImageSuccess, { image });
|
||||
success = true;
|
||||
delete this.imageFetchFailures[image.name];
|
||||
delete this.imageFetchLastFailureTime[image.name];
|
||||
} catch (err) {
|
||||
if (err instanceof DeltaStillProcessingError) {
|
||||
// If this is a delta image pull, and the delta still hasn't finished generating,
|
||||
|
@ -570,7 +570,7 @@ module.exports = class DeviceState extends EventEmitter
|
||||
if @scheduledApply?
|
||||
console.log("Updating failed, but there's another update scheduled immediately: ", err)
|
||||
else
|
||||
delay = Math.min((2 ** @failedUpdates) * 500, 30000)
|
||||
delay = Math.min((2 ** @failedUpdates) * constants.backoffIncrement, constants.maxBackoffTime)
|
||||
# If there was an error then schedule another attempt briefly in the future.
|
||||
console.log('Scheduling another update attempt due to failure: ', delay, err)
|
||||
@triggerApplyTarget({ force, delay, initial })
|
||||
|
@ -32,7 +32,7 @@ const constants = {
|
||||
proxyvisorHookReceiver: 'http://0.0.0.0:1337',
|
||||
configJsonNonAtomicPath: '/boot/config.json',
|
||||
defaultMixpanelToken: process.env.DEFAULT_MIXPANEL_TOKEN,
|
||||
supervisorNetworkInterface: supervisorNetworkInterface,
|
||||
supervisorNetworkInterface,
|
||||
allowedInterfaces: [
|
||||
'resin-vpn',
|
||||
'tun0',
|
||||
@ -50,6 +50,11 @@ const constants = {
|
||||
bootBlockDevice: '/dev/mmcblk0p1',
|
||||
hostConfigVarPrefix: 'HOST_',
|
||||
migrationBackupFile: 'backup.tgz',
|
||||
// Use this failure multiplied by 2**Number of failures to increase
|
||||
// the backoff on subsequent failures
|
||||
backoffIncrement: 500,
|
||||
// The maximum time to backoff on repeated failure
|
||||
maxBackoffTime: 30000,
|
||||
};
|
||||
|
||||
if (process.env.DOCKER_HOST == null) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user