Uninstall containers using force remove

When uninstalling a container for a restart from the API on when
moving between releases, this prefers to force remove the container
rather than stop then remove. This has the same effect in the end, but
force remove is an atomic call, meaning if the supervisor is killed
between the stop and remove, the engine will proceed with the operation
and the supervisor will know to recover from the target state after the
restart.

Change-type: patch
Relates-to: #2257
This commit is contained in:
Felipe Lalanne 2024-05-07 16:09:44 -04:00
parent 76ba0d8f6c
commit 021fa1ccb8
3 changed files with 50 additions and 33 deletions

View File

@ -14,9 +14,6 @@ import type { DeviceLegacyReport } from '../types/state';
interface CompositionStepArgs {
stop: {
current: Service;
options?: {
wait?: boolean;
};
};
kill: {
current: Service;
@ -119,10 +116,7 @@ export function getExecutors(app: { callbacks: CompositionCallbacks }) {
stop: async (step) => {
// Should always be preceded by a takeLock step,
// so the call is executed assuming that the lock is taken.
await serviceManager.kill(step.current, {
removeContainer: false,
wait: step.options?.wait || false,
});
await serviceManager.stop(step.current);
},
kill: async (step) => {
// Should always be preceded by a takeLock step,

View File

@ -1,6 +1,5 @@
import type Dockerode from 'dockerode';
import { EventEmitter } from 'events';
import { isLeft } from 'fp-ts/lib/Either';
import JSONStream from 'JSONStream';
import _ from 'lodash';
import { promises as fs } from 'fs';
@ -10,7 +9,6 @@ import * as config from '../config';
import { docker } from '../lib/docker-utils';
import * as logger from '../logger';
import { PermissiveNumber } from '../config/types';
import * as constants from '../lib/constants';
import type { StatusCodeError } from '../lib/errors';
import {
@ -38,7 +36,6 @@ type ServiceManagerEventEmitter = StrictEventEmitter<
const events: ServiceManagerEventEmitter = new EventEmitter();
interface KillOpts {
removeContainer?: boolean;
wait?: boolean;
}
@ -201,6 +198,15 @@ export async function killAllLegacy(): Promise<void> {
}
}
export function stop(service: Service) {
if (service.containerId == null) {
throw new InternalInconsistencyError(
`Attempt to stop container without containerId! Service :${service}`,
);
}
return stopContainer(service.containerId, service);
}
export function kill(service: Service, opts: KillOpts = {}) {
if (service.containerId == null) {
throw new InternalInconsistencyError(
@ -536,10 +542,43 @@ function reportNewStatus(
);
}
async function stopContainer(
containerId: string,
service: Partial<Service> = {},
) {
logger.logSystemEvent(LogTypes.stopService, { service });
if (service.imageId != null) {
reportNewStatus(containerId, service, 'Stopping');
}
try {
await docker.getContainer(containerId).stop();
delete containerHasDied[containerId];
logger.logSystemEvent(LogTypes.stopServiceSuccess, { service });
} catch (e) {
if (isStatusError(e) && e.statusCode === 304) {
delete containerHasDied[containerId];
logger.logSystemEvent(LogTypes.stopServiceSuccess, { service });
} else if (isStatusError(e) && e.statusCode === 404) {
logger.logSystemEvent(LogTypes.stopService404Error, { service });
delete containerHasDied[containerId];
} else {
logger.logSystemEvent(LogTypes.stopServiceError, {
service,
error: e,
});
}
} finally {
if (service.imageId != null) {
reportChange(containerId);
}
}
}
async function killContainer(
containerId: string,
service: Partial<Service> = {},
{ removeContainer = true, wait = false }: KillOpts = {},
{ wait = false }: KillOpts = {},
): Promise<void> {
// To maintain compatibility of the `wait` flag, this function is not
// async, but it feels like whether or not the promise should be waited on
@ -553,29 +592,9 @@ async function killContainer(
const containerObj = docker.getContainer(containerId);
const killPromise = containerObj
.stop()
.then(() => {
if (removeContainer) {
return containerObj.remove({ v: true });
}
})
.remove({ v: true, force: true })
.catch((e) => {
// Get the statusCode from the original cause and make sure it's
// definitely an int for comparison reasons
const maybeStatusCode = PermissiveNumber.decode(e.statusCode);
if (isLeft(maybeStatusCode)) {
throw new Error(`Could not parse status code from docker error: ${e}`);
}
const statusCode = maybeStatusCode.right;
// 304 means the container was already stopped, so we can just remove it
if (statusCode === 304) {
logger.logSystemEvent(LogTypes.stopServiceNoop, { service });
// Why do we attempt to remove the container again?
if (removeContainer) {
return containerObj.remove({ v: true });
}
} else if (statusCode === 404) {
if (isStatusError(e) && e.statusCode === 304) {
// 404 means the container doesn't exist, precisely what we want!
logger.logSystemEvent(LogTypes.stopRemoveServiceNoop, {
service,

View File

@ -19,6 +19,10 @@ export const stopRemoveServiceNoop: LogType = {
eventName: 'Service already stopped and container removed',
humanName: 'Service is already stopped and the container removed',
};
export const stopService404Error: LogType = {
eventName: 'Service stop error',
humanName: 'Service container not found',
};
export const stopServiceError: LogType = {
eventName: 'Service stop error',
humanName: 'Failed to kill service',