Add support for io.balena.update.requires-reboot

This label can be used by user services to indicate that a reboot is
required after the install of a service in order to fully apply an update.

Change-type: minor
This commit is contained in:
Felipe Lalanne 2024-10-25 12:41:07 -03:00
parent 75127c6074
commit e416ad0daf
No known key found for this signature in database
GPG Key ID: 03E696BFD472B26A
12 changed files with 238 additions and 27 deletions

View File

@ -654,6 +654,7 @@ class AppImpl implements App {
context.targetApp,
needsDownload,
servicesLocked,
context.rebootBreadcrumbSet,
context.appsToLock,
context.availableImages,
context.networkPairs,
@ -682,6 +683,8 @@ class AppImpl implements App {
context.appsToLock,
context.targetApp.services,
servicesLocked,
context.rebootBreadcrumbSet,
context.bootTime,
);
}
@ -761,6 +764,8 @@ class AppImpl implements App {
appsToLock: AppsToLockMap,
targetServices: Service[],
servicesLocked: boolean,
rebootBreadcrumbSet: boolean,
bootTime: Date,
): CompositionStep[] {
// Update container metadata if service release has changed
if (current.commit !== target.commit) {
@ -774,16 +779,38 @@ class AppImpl implements App {
return [];
}
} else if (target.config.running !== current.config.running) {
// Take lock for all services before starting/stopping container
if (!servicesLocked) {
this.services.concat(targetServices).forEach((s) => {
appsToLock[target.appId].add(s.serviceName);
});
return [];
}
if (target.config.running) {
// if the container has a reboot
// required label and the boot time is before the creation time, then
// return a 'noop' to ensure a reboot happens before starting the container
const requiresReboot =
checkTruthy(
target.config.labels?.['io.balena.update.requires-reboot'],
) &&
current.createdAt != null &&
current.createdAt > bootTime;
if (requiresReboot && rebootBreadcrumbSet) {
// Do not return a noop to allow locks to be released by the
// app module
return [];
} else if (requiresReboot) {
return [
generateStep('requireReboot', {
serviceName: target.serviceName,
}),
];
}
return [generateStep('start', { target })];
} else {
// Take lock for all services before stopping container
if (!servicesLocked) {
this.services.concat(targetServices).forEach((s) => {
appsToLock[target.appId].add(s.serviceName);
});
return [];
}
return [generateStep('stop', { current })];
}
} else {
@ -796,6 +823,7 @@ class AppImpl implements App {
targetApp: App,
needsDownload: boolean,
servicesLocked: boolean,
rebootBreadcrumbSet: boolean,
appsToLock: AppsToLockMap,
availableImages: UpdateState['availableImages'],
networkPairs: Array<ChangingPair<Network>>,
@ -832,8 +860,10 @@ class AppImpl implements App {
}
return [generateStep('start', { target })];
} else {
// Wait for dependencies to be started
return [generateStep('noop', {})];
// Wait for dependencies to be started unless there is a
// reboot breadcrumb set, in which case we need to allow the state
// to settle for the reboot to happen
return rebootBreadcrumbSet ? [] : [generateStep('noop', {})];
}
} else {
return [];
@ -897,11 +927,11 @@ class AppImpl implements App {
return false;
}
const depedencyUnmet = _.some(target.dependsOn, (dep) =>
const dependencyUnmet = _.some(target.dependsOn, (dep) =>
_.some(servicePairs, (pair) => pair.target?.serviceName === dep),
);
if (depedencyUnmet) {
if (dependencyUnmet) {
return false;
}

View File

@ -40,6 +40,8 @@ import type {
Image,
InstancedAppState,
} from './types';
import { isRebootBreadcrumbSet } from '../lib/reboot';
import { getBootTime } from '../lib/fs-utils';
type ApplicationManagerEventEmitter = StrictEventEmitter<
EventEmitter,
@ -127,6 +129,7 @@ export async function getRequiredSteps(
config.getMany(['localMode', 'delta']),
]);
const containerIdsByAppId = getAppContainerIds(currentApps);
const rebootBreadcrumbSet = await isRebootBreadcrumbSet();
// Local mode sets the image and volume retention only
// if not explicitely set by the caller
@ -149,6 +152,7 @@ export async function getRequiredSteps(
availableImages,
containerIdsByAppId,
appLocks: lockRegistry,
rebootBreadcrumbSet,
});
}
@ -161,6 +165,7 @@ interface InferNextOpts {
availableImages: UpdateState['availableImages'];
containerIdsByAppId: { [appId: number]: UpdateState['containerIds'] };
appLocks: LockRegistry;
rebootBreadcrumbSet: boolean;
}
// Calculate the required steps from the current to the target state
@ -176,6 +181,7 @@ export async function inferNextSteps(
availableImages = [],
containerIdsByAppId = {},
appLocks = {},
rebootBreadcrumbSet = false,
}: Partial<InferNextOpts>,
) {
const currentAppIds = Object.keys(currentApps).map((i) => parseInt(i, 10));
@ -184,6 +190,7 @@ export async function inferNextSteps(
const withLeftoverLocks = await Promise.all(
currentAppIds.map((id) => hasLeftoverLocks(id)),
);
const bootTime = getBootTime();
let steps: CompositionStep[] = [];
@ -245,6 +252,8 @@ export async function inferNextSteps(
force,
lock: appLocks[id],
hasLeftoverLocks: withLeftoverLocks[id],
rebootBreadcrumbSet,
bootTime,
},
targetApps[id],
),
@ -261,6 +270,8 @@ export async function inferNextSteps(
force,
lock: appLocks[id],
hasLeftoverLocks: withLeftoverLocks[id],
rebootBreadcrumbSet,
bootTime,
}),
);
}
@ -287,6 +298,8 @@ export async function inferNextSteps(
force,
lock: appLocks[id],
hasLeftoverLocks: false,
rebootBreadcrumbSet,
bootTime,
},
targetApps[id],
),

View File

@ -6,6 +6,7 @@ import * as networkManager from './network-manager';
import * as volumeManager from './volume-manager';
import * as commitStore from './commit';
import { Lockable, cleanLocksForApp } from '../lib/update-lock';
import { setRebootBreadcrumb } from '../lib/reboot';
import type { DeviceLegacyReport } from '../types/state';
import type { CompositionStepAction, CompositionStepT } from './types';
import type { Lock } from '../lib/update-lock';
@ -157,6 +158,9 @@ export function getExecutors(app: { callbacks: CompositionCallbacks }) {
// Clean up any remaining locks
await cleanLocksForApp(step.appId);
},
requireReboot: async (step) => {
await setRebootBreadcrumb({ serviceName: step.serviceName });
},
};
return executors;

View File

@ -19,7 +19,7 @@ import {
isStatusError,
} from '../lib/errors';
import * as LogTypes from '../lib/log-types';
import { checkInt, isValidDeviceName } from '../lib/validation';
import { checkInt, isValidDeviceName, checkTruthy } from '../lib/validation';
import { Service } from './service';
import type { ServiceStatus } from './types';
import { serviceNetworksToDockerNetworks } from './utils';
@ -27,6 +27,7 @@ import { serviceNetworksToDockerNetworks } from './utils';
import log from '../lib/supervisor-console';
import logMonitor from '../logging/monitor';
import { setTimeout } from 'timers/promises';
import { getBootTime } from '../lib/fs-utils';
interface ServiceManagerEvents {
change: void;
@ -233,7 +234,7 @@ export async function remove(service: Service) {
}
}
async function create(service: Service) {
async function create(service: Service): Promise<Service> {
const mockContainerId = config.newUniqueKey();
try {
const existing = await get(service);
@ -242,7 +243,7 @@ async function create(service: Service) {
`No containerId provided for service ${service.serviceName} in ServiceManager.updateMetadata. Service: ${service}`,
);
}
return docker.getContainer(existing.containerId);
return existing;
} catch (e: unknown) {
if (!isNotFoundError(e)) {
logger.logSystemEvent(LogTypes.installServiceError, {
@ -287,7 +288,9 @@ async function create(service: Service) {
reportNewStatus(mockContainerId, service, 'Installing');
const container = await docker.createContainer(conf);
service.containerId = container.id;
const inspectInfo = await container.inspect();
service = Service.fromDockerContainer(inspectInfo);
await Promise.all(
_.map((nets || {}).EndpointsConfig, (endpointConfig, name) =>
@ -299,7 +302,7 @@ async function create(service: Service) {
);
logger.logSystemEvent(LogTypes.installServiceSuccess, { service });
return container;
return service;
} finally {
reportChange(mockContainerId);
}
@ -310,13 +313,25 @@ export async function start(service: Service) {
let containerId: string | null = null;
try {
const container = await create(service);
const svc = await create(service);
const container = docker.getContainer(svc.containerId!);
const requiresReboot =
checkTruthy(
service.config.labels?.['io.balena.update.requires-reboot'],
) &&
svc.createdAt != null &&
svc.createdAt > getBootTime();
if (requiresReboot) {
log.warn(`Skipping start of service ${svc.serviceName} until reboot`);
}
// Exit here if the target state of the service
// is set to running: false
// is set to running: false or we are waiting for a reboot
// QUESTION: should we split the service steps into
// 'install' and 'start' instead of doing this?
if (service.config.running === false) {
if (service.config.running === false || requiresReboot) {
return container;
}

View File

@ -128,7 +128,6 @@ class ServiceImpl implements Service {
service.releaseId = parseInt(appConfig.releaseId, 10);
service.serviceId = parseInt(appConfig.serviceId, 10);
service.imageName = appConfig.image;
service.createdAt = appConfig.createdAt;
service.commit = appConfig.commit;
service.appUuid = appConfig.appUuid;

View File

@ -12,6 +12,8 @@ export interface UpdateState {
hasLeftoverLocks: boolean;
lock: Lock | null;
force: boolean;
rebootBreadcrumbSet: boolean;
bootTime: Date;
}
export interface App {

View File

@ -76,6 +76,7 @@ export interface CompositionStepArgs {
appId: string | number;
lock: Lock | null;
};
requireReboot: { serviceName: string };
}
export type CompositionStepAction = keyof CompositionStepArgs;

View File

@ -104,7 +104,13 @@ const actionExecutors: DeviceActionExecutors = {
await setBootConfig(backend, step.target as Dictionary<string>);
}
},
setRebootBreadcrumb,
setRebootBreadcrumb: async (step) => {
const changes =
step != null && step.target != null && typeof step.target === 'object'
? step.target
: {};
return setRebootBreadcrumb(changes);
},
};
const configBackends: ConfigBackend[] = [];
@ -396,6 +402,7 @@ function getConfigSteps(
target: Dictionary<string>,
) {
const configChanges: Dictionary<string> = {};
const rebootingChanges: Dictionary<string> = {};
const humanReadableConfigChanges: Dictionary<string> = {};
let reboot = false;
const steps: ConfigStep[] = [];
@ -431,6 +438,9 @@ function getConfigSteps(
}
if (changingValue != null) {
configChanges[key] = changingValue;
if ($rebootRequired) {
rebootingChanges[key] = changingValue;
}
humanReadableConfigChanges[envVarName] = changingValue;
reboot = $rebootRequired || reboot;
}
@ -440,7 +450,7 @@ function getConfigSteps(
if (!_.isEmpty(configChanges)) {
if (reboot) {
steps.push({ action: 'setRebootBreadcrumb' });
steps.push({ action: 'setRebootBreadcrumb', target: rebootingChanges });
}
steps.push({
@ -527,7 +537,11 @@ async function getBackendSteps(
return [
// All backend steps require a reboot except fan control
...(steps.length > 0 && rebootRequired
? [{ action: 'setRebootBreadcrumb' } as ConfigStep]
? [
{
action: 'setRebootBreadcrumb',
} as ConfigStep,
]
: []),
...steps,
];

View File

@ -657,7 +657,7 @@ export const applyTarget = async ({
// For application manager, the reboot breadcrumb should
// be set after all downloads are ready and target containers
// have been installed
if (_.every(steps, ({ action }) => action === 'noop') && rebootRequired) {
if (steps.every(({ action }) => action === 'noop') && rebootRequired) {
steps.push({
action: 'reboot',
});

View File

@ -87,5 +87,4 @@ export const touch = (file: string, time = new Date()) =>
);
// Get the system boot time as a Date object
export const getBootTime = () =>
new Date(new Date().getTime() - uptime() * 1000);
export const getBootTime = () => new Date(Date.now() - uptime() * 1000);

View File

@ -1,6 +1,7 @@
import { pathOnRoot } from '../lib/host-utils';
import * as fsUtils from '../lib/fs-utils';
import { promises as fs } from 'fs';
import * as logger from '../logging';
// This indicates the file on the host /tmp directory that
// marks the need for a reboot. Since reboot is only triggered for now
@ -11,10 +12,19 @@ const REBOOT_BREADCRUMB = pathOnRoot(
'/tmp/balena-supervisor/reboot-after-apply',
);
export async function setRebootBreadcrumb() {
export async function setRebootBreadcrumb(source: Dictionary<any> = {}) {
// Just create the file. The last step in the target state calculation will check
// the file and create a reboot step
await fsUtils.touch(REBOOT_BREADCRUMB);
logger.logSystemMessage(
`Reboot has been scheduled to apply changes: ${JSON.stringify(source)}`,
{},
'Reboot scheduled',
);
}
export async function isRebootBreadcrumbSet() {
return await fsUtils.exists(REBOOT_BREADCRUMB);
}
export async function isRebootRequired() {

View File

@ -21,6 +21,8 @@ const defaultContext = {
downloading: [] as string[],
lock: null,
hasLeftoverLocks: false,
rebootBreadcrumbSet: false,
bootTime: new Date(Date.now() - 30 * 60 * 1000), // 30 minutes ago
};
const mockLock: Lock = {
@ -2111,6 +2113,128 @@ describe('compose/app', () => {
);
expectSteps('start', steps3, 2);
});
it('should set the reboot breadcrumb after a service with `requires-reboot` has been installed', async () => {
// Container is a "run once" type of service so it has exitted.
const current = createApp({
services: [
await createService(
{
labels: { 'io.balena.update.requires-reboot': 'true' },
running: false,
},
{ state: { createdAt: new Date(), status: 'Installed' } },
),
],
networks: [DEFAULT_NETWORK],
});
// Now test that another start step is not added on this service
const target = createApp({
services: [
await createService({
labels: { 'io.balena.update.requires-reboot': 'true' },
running: true,
}),
],
isTarget: true,
});
const steps = current.nextStepsForAppUpdate(
{
...defaultContext,
rebootBreadcrumbSet: false,
// 30 minutes ago
bootTime: new Date(Date.now() - 30 * 60 * 1000),
},
target,
);
expect(steps.length).to.equal(1);
expectSteps('requireReboot', steps);
});
it('should not try to start a container with `requires-reboot` if the reboot has not taken place yet', async () => {
// Container is a "run once" type of service so it has exitted.
const current = createApp({
services: [
await createService(
{
labels: { 'io.balena.update.requires-reboot': 'true' },
running: false,
},
{ state: { createdAt: new Date(), status: 'Installed' } },
),
],
networks: [DEFAULT_NETWORK],
});
// Now test that another start step is not added on this service
const target = createApp({
services: [
await createService({
labels: { 'io.balena.update.requires-reboot': 'true' },
running: true,
}),
],
isTarget: true,
});
const steps = current.nextStepsForAppUpdate(
{
...defaultContext,
rebootBreadcrumbSet: true,
bootTime: new Date(Date.now() - 30 * 60 * 1000),
},
target,
);
expect(steps.length).to.equal(0);
expectNoStep('start', steps);
});
it('should start a container with `requires-reboot` after reboot has taken place', async () => {
// Container is a "run once" type of service so it has exitted.
const current = createApp({
services: [
await createService(
{
labels: { 'io.balena.update.requires-reboot': 'true' },
running: false,
},
// Container was created 5 minutes ago
{
state: {
createdAt: new Date(Date.now() - 5 * 60 * 1000),
status: 'Installed',
},
},
),
],
networks: [DEFAULT_NETWORK],
});
// Now test that another start step is not added on this service
const target = createApp({
services: [
await createService({
labels: { 'io.balena.update.requires-reboot': 'true' },
running: true,
}),
],
isTarget: true,
});
const steps = current.nextStepsForAppUpdate(
{
...defaultContext,
rebootBreadcrumbSet: true,
// Reboot just happened
bootTime: new Date(),
},
target,
);
expect(steps.length).to.equal(1);
expectSteps('start', steps);
});
});
describe('image state behavior', () => {