From e416ad0daf61fba14cd8c2012c5b2f66d8fb5f4a Mon Sep 17 00:00:00 2001 From: Felipe Lalanne Date: Fri, 25 Oct 2024 12:41:07 -0300 Subject: [PATCH] Add support for `io.balena.update.requires-reboot` This label can be used by user services to indicate that a reboot is required after the install of a service in order to fully apply an update. Change-type: minor --- src/compose/app.ts | 52 ++++++++--- src/compose/application-manager.ts | 13 +++ src/compose/composition-steps.ts | 4 + src/compose/service-manager.ts | 31 +++++-- src/compose/service.ts | 1 - src/compose/types/app.ts | 2 + src/compose/types/composition-step.ts | 1 + src/device-state/device-config.ts | 20 ++++- src/device-state/index.ts | 2 +- src/lib/fs-utils.ts | 3 +- src/lib/reboot.ts | 12 ++- test/unit/compose/app.spec.ts | 124 ++++++++++++++++++++++++++ 12 files changed, 238 insertions(+), 27 deletions(-) diff --git a/src/compose/app.ts b/src/compose/app.ts index d62fae3f..98bc19e6 100644 --- a/src/compose/app.ts +++ b/src/compose/app.ts @@ -654,6 +654,7 @@ class AppImpl implements App { context.targetApp, needsDownload, servicesLocked, + context.rebootBreadcrumbSet, context.appsToLock, context.availableImages, context.networkPairs, @@ -682,6 +683,8 @@ class AppImpl implements App { context.appsToLock, context.targetApp.services, servicesLocked, + context.rebootBreadcrumbSet, + context.bootTime, ); } @@ -761,6 +764,8 @@ class AppImpl implements App { appsToLock: AppsToLockMap, targetServices: Service[], servicesLocked: boolean, + rebootBreadcrumbSet: boolean, + bootTime: Date, ): CompositionStep[] { // Update container metadata if service release has changed if (current.commit !== target.commit) { @@ -774,16 +779,38 @@ class AppImpl implements App { return []; } } else if (target.config.running !== current.config.running) { - // Take lock for all services before starting/stopping container - if (!servicesLocked) { - this.services.concat(targetServices).forEach((s) => { - appsToLock[target.appId].add(s.serviceName); - }); - return []; - } if (target.config.running) { + // if the container has a reboot + // required label and the boot time is before the creation time, then + // return a 'noop' to ensure a reboot happens before starting the container + const requiresReboot = + checkTruthy( + target.config.labels?.['io.balena.update.requires-reboot'], + ) && + current.createdAt != null && + current.createdAt > bootTime; + + if (requiresReboot && rebootBreadcrumbSet) { + // Do not return a noop to allow locks to be released by the + // app module + return []; + } else if (requiresReboot) { + return [ + generateStep('requireReboot', { + serviceName: target.serviceName, + }), + ]; + } + return [generateStep('start', { target })]; } else { + // Take lock for all services before stopping container + if (!servicesLocked) { + this.services.concat(targetServices).forEach((s) => { + appsToLock[target.appId].add(s.serviceName); + }); + return []; + } return [generateStep('stop', { current })]; } } else { @@ -796,6 +823,7 @@ class AppImpl implements App { targetApp: App, needsDownload: boolean, servicesLocked: boolean, + rebootBreadcrumbSet: boolean, appsToLock: AppsToLockMap, availableImages: UpdateState['availableImages'], networkPairs: Array>, @@ -832,8 +860,10 @@ class AppImpl implements App { } return [generateStep('start', { target })]; } else { - // Wait for dependencies to be started - return [generateStep('noop', {})]; + // Wait for dependencies to be started unless there is a + // reboot breadcrumb set, in which case we need to allow the state + // to settle for the reboot to happen + return rebootBreadcrumbSet ? [] : [generateStep('noop', {})]; } } else { return []; @@ -897,11 +927,11 @@ class AppImpl implements App { return false; } - const depedencyUnmet = _.some(target.dependsOn, (dep) => + const dependencyUnmet = _.some(target.dependsOn, (dep) => _.some(servicePairs, (pair) => pair.target?.serviceName === dep), ); - if (depedencyUnmet) { + if (dependencyUnmet) { return false; } diff --git a/src/compose/application-manager.ts b/src/compose/application-manager.ts index cdec7b0e..ca2f777c 100644 --- a/src/compose/application-manager.ts +++ b/src/compose/application-manager.ts @@ -40,6 +40,8 @@ import type { Image, InstancedAppState, } from './types'; +import { isRebootBreadcrumbSet } from '../lib/reboot'; +import { getBootTime } from '../lib/fs-utils'; type ApplicationManagerEventEmitter = StrictEventEmitter< EventEmitter, @@ -127,6 +129,7 @@ export async function getRequiredSteps( config.getMany(['localMode', 'delta']), ]); const containerIdsByAppId = getAppContainerIds(currentApps); + const rebootBreadcrumbSet = await isRebootBreadcrumbSet(); // Local mode sets the image and volume retention only // if not explicitely set by the caller @@ -149,6 +152,7 @@ export async function getRequiredSteps( availableImages, containerIdsByAppId, appLocks: lockRegistry, + rebootBreadcrumbSet, }); } @@ -161,6 +165,7 @@ interface InferNextOpts { availableImages: UpdateState['availableImages']; containerIdsByAppId: { [appId: number]: UpdateState['containerIds'] }; appLocks: LockRegistry; + rebootBreadcrumbSet: boolean; } // Calculate the required steps from the current to the target state @@ -176,6 +181,7 @@ export async function inferNextSteps( availableImages = [], containerIdsByAppId = {}, appLocks = {}, + rebootBreadcrumbSet = false, }: Partial, ) { const currentAppIds = Object.keys(currentApps).map((i) => parseInt(i, 10)); @@ -184,6 +190,7 @@ export async function inferNextSteps( const withLeftoverLocks = await Promise.all( currentAppIds.map((id) => hasLeftoverLocks(id)), ); + const bootTime = getBootTime(); let steps: CompositionStep[] = []; @@ -245,6 +252,8 @@ export async function inferNextSteps( force, lock: appLocks[id], hasLeftoverLocks: withLeftoverLocks[id], + rebootBreadcrumbSet, + bootTime, }, targetApps[id], ), @@ -261,6 +270,8 @@ export async function inferNextSteps( force, lock: appLocks[id], hasLeftoverLocks: withLeftoverLocks[id], + rebootBreadcrumbSet, + bootTime, }), ); } @@ -287,6 +298,8 @@ export async function inferNextSteps( force, lock: appLocks[id], hasLeftoverLocks: false, + rebootBreadcrumbSet, + bootTime, }, targetApps[id], ), diff --git a/src/compose/composition-steps.ts b/src/compose/composition-steps.ts index 00b3e747..d0309626 100644 --- a/src/compose/composition-steps.ts +++ b/src/compose/composition-steps.ts @@ -6,6 +6,7 @@ import * as networkManager from './network-manager'; import * as volumeManager from './volume-manager'; import * as commitStore from './commit'; import { Lockable, cleanLocksForApp } from '../lib/update-lock'; +import { setRebootBreadcrumb } from '../lib/reboot'; import type { DeviceLegacyReport } from '../types/state'; import type { CompositionStepAction, CompositionStepT } from './types'; import type { Lock } from '../lib/update-lock'; @@ -157,6 +158,9 @@ export function getExecutors(app: { callbacks: CompositionCallbacks }) { // Clean up any remaining locks await cleanLocksForApp(step.appId); }, + requireReboot: async (step) => { + await setRebootBreadcrumb({ serviceName: step.serviceName }); + }, }; return executors; diff --git a/src/compose/service-manager.ts b/src/compose/service-manager.ts index e05a4492..54c71df2 100644 --- a/src/compose/service-manager.ts +++ b/src/compose/service-manager.ts @@ -19,7 +19,7 @@ import { isStatusError, } from '../lib/errors'; import * as LogTypes from '../lib/log-types'; -import { checkInt, isValidDeviceName } from '../lib/validation'; +import { checkInt, isValidDeviceName, checkTruthy } from '../lib/validation'; import { Service } from './service'; import type { ServiceStatus } from './types'; import { serviceNetworksToDockerNetworks } from './utils'; @@ -27,6 +27,7 @@ import { serviceNetworksToDockerNetworks } from './utils'; import log from '../lib/supervisor-console'; import logMonitor from '../logging/monitor'; import { setTimeout } from 'timers/promises'; +import { getBootTime } from '../lib/fs-utils'; interface ServiceManagerEvents { change: void; @@ -233,7 +234,7 @@ export async function remove(service: Service) { } } -async function create(service: Service) { +async function create(service: Service): Promise { const mockContainerId = config.newUniqueKey(); try { const existing = await get(service); @@ -242,7 +243,7 @@ async function create(service: Service) { `No containerId provided for service ${service.serviceName} in ServiceManager.updateMetadata. Service: ${service}`, ); } - return docker.getContainer(existing.containerId); + return existing; } catch (e: unknown) { if (!isNotFoundError(e)) { logger.logSystemEvent(LogTypes.installServiceError, { @@ -287,7 +288,9 @@ async function create(service: Service) { reportNewStatus(mockContainerId, service, 'Installing'); const container = await docker.createContainer(conf); - service.containerId = container.id; + const inspectInfo = await container.inspect(); + + service = Service.fromDockerContainer(inspectInfo); await Promise.all( _.map((nets || {}).EndpointsConfig, (endpointConfig, name) => @@ -299,7 +302,7 @@ async function create(service: Service) { ); logger.logSystemEvent(LogTypes.installServiceSuccess, { service }); - return container; + return service; } finally { reportChange(mockContainerId); } @@ -310,13 +313,25 @@ export async function start(service: Service) { let containerId: string | null = null; try { - const container = await create(service); + const svc = await create(service); + const container = docker.getContainer(svc.containerId!); + + const requiresReboot = + checkTruthy( + service.config.labels?.['io.balena.update.requires-reboot'], + ) && + svc.createdAt != null && + svc.createdAt > getBootTime(); + + if (requiresReboot) { + log.warn(`Skipping start of service ${svc.serviceName} until reboot`); + } // Exit here if the target state of the service - // is set to running: false + // is set to running: false or we are waiting for a reboot // QUESTION: should we split the service steps into // 'install' and 'start' instead of doing this? - if (service.config.running === false) { + if (service.config.running === false || requiresReboot) { return container; } diff --git a/src/compose/service.ts b/src/compose/service.ts index e0f49546..a0bdde68 100644 --- a/src/compose/service.ts +++ b/src/compose/service.ts @@ -128,7 +128,6 @@ class ServiceImpl implements Service { service.releaseId = parseInt(appConfig.releaseId, 10); service.serviceId = parseInt(appConfig.serviceId, 10); service.imageName = appConfig.image; - service.createdAt = appConfig.createdAt; service.commit = appConfig.commit; service.appUuid = appConfig.appUuid; diff --git a/src/compose/types/app.ts b/src/compose/types/app.ts index 9984ab6d..4049871c 100644 --- a/src/compose/types/app.ts +++ b/src/compose/types/app.ts @@ -12,6 +12,8 @@ export interface UpdateState { hasLeftoverLocks: boolean; lock: Lock | null; force: boolean; + rebootBreadcrumbSet: boolean; + bootTime: Date; } export interface App { diff --git a/src/compose/types/composition-step.ts b/src/compose/types/composition-step.ts index d27f54c5..13e937e6 100644 --- a/src/compose/types/composition-step.ts +++ b/src/compose/types/composition-step.ts @@ -76,6 +76,7 @@ export interface CompositionStepArgs { appId: string | number; lock: Lock | null; }; + requireReboot: { serviceName: string }; } export type CompositionStepAction = keyof CompositionStepArgs; diff --git a/src/device-state/device-config.ts b/src/device-state/device-config.ts index 79c83e15..f98fd575 100644 --- a/src/device-state/device-config.ts +++ b/src/device-state/device-config.ts @@ -104,7 +104,13 @@ const actionExecutors: DeviceActionExecutors = { await setBootConfig(backend, step.target as Dictionary); } }, - setRebootBreadcrumb, + setRebootBreadcrumb: async (step) => { + const changes = + step != null && step.target != null && typeof step.target === 'object' + ? step.target + : {}; + return setRebootBreadcrumb(changes); + }, }; const configBackends: ConfigBackend[] = []; @@ -396,6 +402,7 @@ function getConfigSteps( target: Dictionary, ) { const configChanges: Dictionary = {}; + const rebootingChanges: Dictionary = {}; const humanReadableConfigChanges: Dictionary = {}; let reboot = false; const steps: ConfigStep[] = []; @@ -431,6 +438,9 @@ function getConfigSteps( } if (changingValue != null) { configChanges[key] = changingValue; + if ($rebootRequired) { + rebootingChanges[key] = changingValue; + } humanReadableConfigChanges[envVarName] = changingValue; reboot = $rebootRequired || reboot; } @@ -440,7 +450,7 @@ function getConfigSteps( if (!_.isEmpty(configChanges)) { if (reboot) { - steps.push({ action: 'setRebootBreadcrumb' }); + steps.push({ action: 'setRebootBreadcrumb', target: rebootingChanges }); } steps.push({ @@ -527,7 +537,11 @@ async function getBackendSteps( return [ // All backend steps require a reboot except fan control ...(steps.length > 0 && rebootRequired - ? [{ action: 'setRebootBreadcrumb' } as ConfigStep] + ? [ + { + action: 'setRebootBreadcrumb', + } as ConfigStep, + ] : []), ...steps, ]; diff --git a/src/device-state/index.ts b/src/device-state/index.ts index 949354c9..14a86fe4 100644 --- a/src/device-state/index.ts +++ b/src/device-state/index.ts @@ -657,7 +657,7 @@ export const applyTarget = async ({ // For application manager, the reboot breadcrumb should // be set after all downloads are ready and target containers // have been installed - if (_.every(steps, ({ action }) => action === 'noop') && rebootRequired) { + if (steps.every(({ action }) => action === 'noop') && rebootRequired) { steps.push({ action: 'reboot', }); diff --git a/src/lib/fs-utils.ts b/src/lib/fs-utils.ts index 484b6434..303b94ca 100644 --- a/src/lib/fs-utils.ts +++ b/src/lib/fs-utils.ts @@ -87,5 +87,4 @@ export const touch = (file: string, time = new Date()) => ); // Get the system boot time as a Date object -export const getBootTime = () => - new Date(new Date().getTime() - uptime() * 1000); +export const getBootTime = () => new Date(Date.now() - uptime() * 1000); diff --git a/src/lib/reboot.ts b/src/lib/reboot.ts index 21d77b87..62239d3c 100644 --- a/src/lib/reboot.ts +++ b/src/lib/reboot.ts @@ -1,6 +1,7 @@ import { pathOnRoot } from '../lib/host-utils'; import * as fsUtils from '../lib/fs-utils'; import { promises as fs } from 'fs'; +import * as logger from '../logging'; // This indicates the file on the host /tmp directory that // marks the need for a reboot. Since reboot is only triggered for now @@ -11,10 +12,19 @@ const REBOOT_BREADCRUMB = pathOnRoot( '/tmp/balena-supervisor/reboot-after-apply', ); -export async function setRebootBreadcrumb() { +export async function setRebootBreadcrumb(source: Dictionary = {}) { // Just create the file. The last step in the target state calculation will check // the file and create a reboot step await fsUtils.touch(REBOOT_BREADCRUMB); + logger.logSystemMessage( + `Reboot has been scheduled to apply changes: ${JSON.stringify(source)}`, + {}, + 'Reboot scheduled', + ); +} + +export async function isRebootBreadcrumbSet() { + return await fsUtils.exists(REBOOT_BREADCRUMB); } export async function isRebootRequired() { diff --git a/test/unit/compose/app.spec.ts b/test/unit/compose/app.spec.ts index e3718170..42e1baa3 100644 --- a/test/unit/compose/app.spec.ts +++ b/test/unit/compose/app.spec.ts @@ -21,6 +21,8 @@ const defaultContext = { downloading: [] as string[], lock: null, hasLeftoverLocks: false, + rebootBreadcrumbSet: false, + bootTime: new Date(Date.now() - 30 * 60 * 1000), // 30 minutes ago }; const mockLock: Lock = { @@ -2111,6 +2113,128 @@ describe('compose/app', () => { ); expectSteps('start', steps3, 2); }); + + it('should set the reboot breadcrumb after a service with `requires-reboot` has been installed', async () => { + // Container is a "run once" type of service so it has exitted. + const current = createApp({ + services: [ + await createService( + { + labels: { 'io.balena.update.requires-reboot': 'true' }, + running: false, + }, + { state: { createdAt: new Date(), status: 'Installed' } }, + ), + ], + networks: [DEFAULT_NETWORK], + }); + + // Now test that another start step is not added on this service + const target = createApp({ + services: [ + await createService({ + labels: { 'io.balena.update.requires-reboot': 'true' }, + running: true, + }), + ], + isTarget: true, + }); + + const steps = current.nextStepsForAppUpdate( + { + ...defaultContext, + rebootBreadcrumbSet: false, + // 30 minutes ago + bootTime: new Date(Date.now() - 30 * 60 * 1000), + }, + target, + ); + expect(steps.length).to.equal(1); + expectSteps('requireReboot', steps); + }); + + it('should not try to start a container with `requires-reboot` if the reboot has not taken place yet', async () => { + // Container is a "run once" type of service so it has exitted. + const current = createApp({ + services: [ + await createService( + { + labels: { 'io.balena.update.requires-reboot': 'true' }, + running: false, + }, + { state: { createdAt: new Date(), status: 'Installed' } }, + ), + ], + networks: [DEFAULT_NETWORK], + }); + + // Now test that another start step is not added on this service + const target = createApp({ + services: [ + await createService({ + labels: { 'io.balena.update.requires-reboot': 'true' }, + running: true, + }), + ], + isTarget: true, + }); + + const steps = current.nextStepsForAppUpdate( + { + ...defaultContext, + rebootBreadcrumbSet: true, + bootTime: new Date(Date.now() - 30 * 60 * 1000), + }, + target, + ); + expect(steps.length).to.equal(0); + expectNoStep('start', steps); + }); + + it('should start a container with `requires-reboot` after reboot has taken place', async () => { + // Container is a "run once" type of service so it has exitted. + const current = createApp({ + services: [ + await createService( + { + labels: { 'io.balena.update.requires-reboot': 'true' }, + running: false, + }, + // Container was created 5 minutes ago + { + state: { + createdAt: new Date(Date.now() - 5 * 60 * 1000), + status: 'Installed', + }, + }, + ), + ], + networks: [DEFAULT_NETWORK], + }); + + // Now test that another start step is not added on this service + const target = createApp({ + services: [ + await createService({ + labels: { 'io.balena.update.requires-reboot': 'true' }, + running: true, + }), + ], + isTarget: true, + }); + + const steps = current.nextStepsForAppUpdate( + { + ...defaultContext, + rebootBreadcrumbSet: true, + // Reboot just happened + bootTime: new Date(), + }, + target, + ); + expect(steps.length).to.equal(1); + expectSteps('start', steps); + }); }); describe('image state behavior', () => {