From 7f3214195838a8bf3242ef7540b4487338687e4c Mon Sep 17 00:00:00 2001 From: Christina Ying Wang Date: Mon, 15 May 2023 17:11:57 -0700 Subject: [PATCH 1/3] Handle Engine-host race condition for "always" and "unless-stopped" restart policy There exists a race condition between Engine and a host resource that may not be immediately created. In this race condition, if a container's compose config depends on the existence of that host resource, such as a network interface, and the Engine tries to create & start the container before the host resource is created, the Engine will not reattempt to start the container, regardless of the restart policy. This is undesireable behavior but seems to be the behavior as implemented by Docker. To rectify this, the Supervisor state funnel noops for a grace period of 1 minute after starting a container to see that the container's status has become 'running`. If the container exits because of the race condition, the status becomes 'exited' and the Supervisor will attempt to generate another start step. This noop-wait-start step loop will repeat until the container is able to start. If the container is never able to start, there was a problem in the host in the creation of the host resource, and that should be fixed at the host level. This commit does not handle the case of services with restart policies "no" or "on-failure" which encounter this host race, as metadata from container inspects needs to be introduced during step calculation in order to figure out whether services with those restart policies need to be started. This will be fixed in a future PR. Change-type: patch Signed-off-by: Christina Ying Wang --- src/compose/app.ts | 63 +++- src/compose/utils.ts | 4 + .../compose/application-manager.spec.ts | 345 ++++++++++++++++++ 3 files changed, 408 insertions(+), 4 deletions(-) diff --git a/src/compose/app.ts b/src/compose/app.ts index 42a32057..be123240 100644 --- a/src/compose/app.ts +++ b/src/compose/app.ts @@ -4,7 +4,6 @@ import { promises as fs } from 'fs'; import Network from './network'; import Volume from './volume'; import Service from './service'; - import * as imageManager from './images'; import type { Image } from './images'; import { @@ -12,6 +11,7 @@ import { generateStep, CompositionStepAction, } from './composition-steps'; +import { isOlderThan } from './utils'; import * as targetStateCache from '../device-state/target-state-cache'; import * as dockerUtils from '../lib/docker-utils'; import * as constants from '../lib/constants'; @@ -26,6 +26,8 @@ import { ImageInspectInfo } from 'dockerode'; import { pathExistsOnRoot } from '../lib/host-utils'; import { isSupervisor } from '../lib/supervisor-metadata'; +const SECONDS_TO_WAIT_FOR_START = 60; + export interface AppConstructOpts { appId: number; appUuid?: string; @@ -184,7 +186,6 @@ export class App { }), ); } - return steps; } @@ -377,9 +378,22 @@ export class App { // Only start a Service if we have never started it before and the service matches target! // This is so the engine can handle the restart policy configured for the container. + // + // However, there is a certain race condition where the container's compose depends on a host + // resource that may not be there when the Engine starts the container, such as a port binding + // of 192.168.88.1:3000:3000, where 192.168.88.1 is a user-defined interface configured in system-connections + // and created by the host. This interface creation may not occur before the container creation. + // In this case, the container is created and never started, and the Engine does not attempt to restart it + // regardless of restart policy. + const requiresExplicitStart = + ['always', 'unless-stopped'].includes(serviceTarget.config.restart) && + serviceCurrent.status === 'exited' && + serviceCurrent.createdAt != null && + isOlderThan(serviceCurrent.createdAt, SECONDS_TO_WAIT_FOR_START); return ( (serviceCurrent.status === 'Installing' || - serviceCurrent.status === 'Installed') && + serviceCurrent.status === 'Installed' || + requiresExplicitStart) && isEqualExceptForRunningState(serviceCurrent, serviceTarget) ); }; @@ -401,6 +415,26 @@ export class App { ); }; + /** + * Under the race condition of a container depending on a host resource that may not be there when the Engine + * starts up, resulting in the Engine not restarting the container regardless of restart policy, the Supervisor + * state loop needs to stay alive so that the Supervisor may issue a start step after a wait. + * @param serviceCurrent + */ + const shouldWaitForStart = ( + serviceCurrent: Service, + serviceTarget: Service, + ) => { + return ( + ['always', 'unless-stopped'].includes(serviceTarget.config.restart) && + serviceCurrent.config.running === false && + serviceTarget.config.running === true && + serviceCurrent.status === 'exited' && + serviceCurrent.createdAt != null && + !isOlderThan(serviceCurrent.createdAt, SECONDS_TO_WAIT_FOR_START) + ); + }; + /** * Checks if Supervisor should keep the state loop alive while waiting on a service to stop * @param serviceCurrent @@ -425,7 +459,8 @@ export class App { !isEqualExceptForRunningState(c, t) || shouldBeStarted(c, t) || shouldBeStopped(c, t) || - shouldWaitForStop(c), + shouldWaitForStop(c) || + shouldWaitForStart(c, t), ); return { @@ -636,6 +671,26 @@ export class App { if (current.commit !== target.commit) { return generateStep('updateMetadata', { current, target }); } else if (target.config.running !== current.config.running) { + // In the case where the Engine does not start the container despite the + // restart policy (this can happen in cases of Engine race conditions with + // host resources that are slower to be created but that a service relies on), + // we need to start the container after a delay. + // FIXME: It makes absolutely zero sense that an empty string is the value + // for config.restart if the restart policy is 'no'. + // TODO: There is no way to tell from a Service instance whether a service + // exited with a non-zero exit code, therefore this method cannot determine + // whether to start a service with a restart policy of 'on-failure'. There + // is no way to get the exit message from a Service instance either, so we + // can't parse the error message to determine whether to start a service with + // a restart policy of 'no' (in case a oneshot suffered from this race condition). + if ( + ['always', 'unless-stopped'].includes(target.config.restart) && + current.status === 'exited' && + current.createdAt != null && + !isOlderThan(current.createdAt, SECONDS_TO_WAIT_FOR_START) + ) { + return generateStep('noop', {}); + } if (target.config.running) { return generateStep('start', { target }); } else { diff --git a/src/compose/utils.ts b/src/compose/utils.ts index 1b5d89f4..c4a87067 100644 --- a/src/compose/utils.ts +++ b/src/compose/utils.ts @@ -693,3 +693,7 @@ export function dockerMountToServiceMount( return mount as LongDefinition; } + +export function isOlderThan(currentTime: Date, seconds: number) { + return new Date().getTime() - currentTime.getTime() > seconds * 1000; +} diff --git a/test/integration/compose/application-manager.spec.ts b/test/integration/compose/application-manager.spec.ts index 88524ff3..95656608 100644 --- a/test/integration/compose/application-manager.spec.ts +++ b/test/integration/compose/application-manager.spec.ts @@ -1417,4 +1417,349 @@ describe('compose/application-manager', () => { }); }); }); + + // In the case where a container requires a host resource such as a network interface that is not created by the time the Engine + // comes up, the Engine will not attempt to restart the container which seems to be Docker's implemented behavior (if not the correct behavior). + // An example of a host resource would be a port binding such as 192.168.88.1:3000:3000, where the IP is an interface delayed in creation by host. + // In this case, the Supervisor needs to wait a grace period for the Engine to start the container, and if this does not occur, the Supervisor + // deduces the existence of this race condition and generates another start step after a delay (1 minute by default). + describe('handling Engine restart policy inaction when host resource required by container is delayed in creation', () => { + // Time 61 seconds ago + const date61SecondsAgo = new Date(); + date61SecondsAgo.setSeconds(date61SecondsAgo.getSeconds() - 61); + + // Time 59 seconds ago + const date50SecondsAgo = new Date(); + date50SecondsAgo.setSeconds(date50SecondsAgo.getSeconds() - 50); + + // TODO: We need to be able to start a service with restart policy "no" if that service did not start at all due to + // the host resource race condition described above. However, this is harder to parse as the containers do not include + // the proper metadata for this. The last resort would be parsing the error message that caused the container to exit. + it('should not infer any steps for a service with a status of "exited" if restart policy is "no" or "on-failure"', async () => { + // Conditions: + // - restart: "no" || "on-failure" + // - status: "exited" + const targetApps = createApps( + { + services: [ + await createService({ + image: 'image-1', + serviceName: 'one', + composition: { + restart: 'no', + }, + }), + await createService({ + image: 'image-2', + serviceName: 'two', + composition: { + restart: 'no', + }, + }), + await createService({ + image: 'image-3', + serviceName: 'three', + composition: { + restart: 'on-failure', + }, + }), + await createService({ + image: 'image-4', + serviceName: 'four', + composition: { + restart: 'on-failure', + }, + }), + ], + networks: [DEFAULT_NETWORK], + }, + true, + ); + + const { currentApps, availableImages, downloading, containerIdsByAppId } = + createCurrentState({ + services: [ + await createService( + { + image: 'image-1', + serviceName: 'one', + composition: { + restart: 'no', + }, + }, + { + state: { + status: 'exited', + // Should not generate noop if exited within 1 minute + createdAt: date50SecondsAgo, + }, + }, + ), + await createService( + { + image: 'image-2', + serviceName: 'two', + composition: { + restart: 'no', + }, + }, + { + state: { + status: 'exited', + // Should not generate start if exited more than 1 minute ago + createdAt: date61SecondsAgo, + }, + }, + ), + await createService( + { + image: 'image-3', + serviceName: 'three', + composition: { + restart: 'on-failure', + }, + }, + { + state: { + status: 'exited', + // Should not generate noop if exited within 1 minute + createdAt: date50SecondsAgo, + }, + }, + ), + await createService( + { + image: 'image-4', + serviceName: 'four', + composition: { + restart: 'on-failure', + }, + }, + { + state: { + status: 'exited', + // Should not generate start if exited more than 1 minute ago + createdAt: date61SecondsAgo, + }, + }, + ), + ], + networks: [DEFAULT_NETWORK], + images: [ + createImage({ + name: 'image-1', + serviceName: 'one', + }), + createImage({ + name: 'image-2', + serviceName: 'two', + }), + createImage({ + name: 'image-3', + serviceName: 'three', + }), + createImage({ + name: 'image-4', + serviceName: 'four', + }), + ], + }); + + const [...steps] = await applicationManager.inferNextSteps( + currentApps, + targetApps, + { + downloading, + availableImages, + containerIdsByAppId, + }, + ); + + expect(steps).to.have.lengthOf(0); + }); + + it('should infer a noop step for a service that was created <=1 min ago with status of "exited" if restart policy is "always" or "unless-stopped"', async () => { + // Conditions: + // - restart: "always" || "unless-stopped" + // - status: "exited" + // - createdAt: <= SECONDS_TO_WAIT_FOR_START ago + const targetApps = createApps( + { + services: [ + await createService({ + image: 'image-1', + serviceName: 'one', + composition: { + restart: 'always', + }, + }), + await createService({ + image: 'image-2', + serviceName: 'two', + composition: { + restart: 'unless-stopped', + }, + }), + ], + networks: [DEFAULT_NETWORK], + }, + true, + ); + + const { currentApps, availableImages, downloading, containerIdsByAppId } = + createCurrentState({ + services: [ + await createService( + { + image: 'image-1', + serviceName: 'one', + running: false, + composition: { + restart: 'always', + }, + }, + { + state: { + status: 'exited', + createdAt: date50SecondsAgo, + }, + }, + ), + await createService( + { + image: 'image-2', + serviceName: 'two', + running: false, + composition: { + restart: 'unless-stopped', + }, + }, + { + state: { + status: 'exited', + createdAt: date50SecondsAgo, + }, + }, + ), + ], + networks: [DEFAULT_NETWORK], + images: [ + createImage({ + name: 'image-1', + serviceName: 'one', + }), + createImage({ + name: 'image-2', + serviceName: 'two', + }), + ], + }); + + const [noopStep1, noopStep2, ...nextSteps] = + await applicationManager.inferNextSteps(currentApps, targetApps, { + downloading, + availableImages, + containerIdsByAppId, + }); + + expect(noopStep1).to.have.property('action').that.equals('noop'); + expect(noopStep2).to.have.property('action').that.equals('noop'); + + expect(nextSteps).to.have.lengthOf(0); + }); + + it('should infer a start step for a service that was created >1 min ago with status of "exited" if restart policy is "always" or "unless-stopped"', async () => { + // Conditions: + // - restart: "always" || "unless-stopped" + // - status: "exited" + // - createdAt: <= SECONDS_TO_WAIT_FOR_START ago + const targetApps = createApps( + { + services: [ + await createService({ + image: 'image-1', + serviceName: 'one', + composition: { + restart: 'always', + }, + }), + await createService({ + image: 'image-2', + serviceName: 'two', + composition: { + restart: 'unless-stopped', + }, + }), + ], + networks: [DEFAULT_NETWORK], + }, + true, + ); + + const { currentApps, availableImages, downloading, containerIdsByAppId } = + createCurrentState({ + services: [ + await createService( + { + image: 'image-1', + serviceName: 'one', + running: false, + composition: { + restart: 'always', + }, + }, + { + state: { + status: 'exited', + createdAt: date61SecondsAgo, + }, + }, + ), + await createService( + { + image: 'image-2', + serviceName: 'two', + running: false, + composition: { + restart: 'unless-stopped', + }, + }, + { + state: { + status: 'exited', + createdAt: date61SecondsAgo, + }, + }, + ), + ], + networks: [DEFAULT_NETWORK], + images: [ + createImage({ + name: 'image-1', + serviceName: 'one', + }), + createImage({ + name: 'image-2', + serviceName: 'two', + }), + ], + }); + + const [startStep1, startStep2, ...nextSteps] = + await applicationManager.inferNextSteps(currentApps, targetApps, { + downloading, + availableImages, + containerIdsByAppId, + }); + + [startStep1, startStep2].forEach((step) => { + expect(step).to.have.property('action').that.equals('start'); + expect(step) + .to.have.property('target') + .that.has.property('serviceName') + .that.is.oneOf(['one', 'two']); + }); + expect(nextSteps).to.have.lengthOf(0); + }); + }); }); From 95f3e13d50877a7366826e187b9a7a388a24bfc5 Mon Sep 17 00:00:00 2001 From: Christina Ying Wang Date: Tue, 30 May 2023 15:17:11 -0700 Subject: [PATCH 2/3] Add extra delay after state engine integration tests This ensures target state has settled (since it seems that the 'applied' status that's reported isn't 100% accurate and the actual Engine state may lag behind slightly) Signed-off-by: Christina Ying Wang --- test/integration/state-engine.spec.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/integration/state-engine.spec.ts b/test/integration/state-engine.spec.ts index c894dacf..4b49c5c0 100644 --- a/test/integration/state-engine.spec.ts +++ b/test/integration/state-engine.spec.ts @@ -49,6 +49,8 @@ const setTargetState = async ( while (true) { const status = await getStatus(); if (status.appState === 'applied') { + // Wait a tiny bit more after applied for state to settle + await delay(1000); clearTimeout(timer); resolve(true); break; From 2537eb818993ee25ba6d4d9a4a0464ba5bbfa5f6 Mon Sep 17 00:00:00 2001 From: Christina Ying Wang Date: Mon, 5 Jun 2023 11:05:58 -0700 Subject: [PATCH 3/3] Handle the case of 'on-failure' restart policy As explained in the comments of this commit, a container with the restart policy of 'on-failure' with a non-zero exit code matches the conditions for the race, so the Supervisor will also attempt to start it. A container with the 'no' restart policy that has been started once will not be started again. If a container with 'no' has never been started, its service status will be 'Installed' and the Supervisor will already try to start it until success, so the service with 'no' doesn't require special handling. Signed-off-by: Christina Ying Wang --- src/compose/app.ts | 181 +++++++++++++++-------------- src/compose/application-manager.ts | 4 +- src/compose/service-manager.ts | 8 +- src/compose/utils.ts | 5 +- test/unit/compose/app.spec.ts | 157 ++++++++++++++----------- 5 files changed, 195 insertions(+), 160 deletions(-) diff --git a/src/compose/app.ts b/src/compose/app.ts index be123240..c45751f2 100644 --- a/src/compose/app.ts +++ b/src/compose/app.ts @@ -1,5 +1,6 @@ import * as _ from 'lodash'; import { promises as fs } from 'fs'; +import { ImageInspectInfo } from 'dockerode'; import Network from './network'; import Volume from './volume'; @@ -12,8 +13,9 @@ import { CompositionStepAction, } from './composition-steps'; import { isOlderThan } from './utils'; +import { inspectByDockerContainerId } from './service-manager'; import * as targetStateCache from '../device-state/target-state-cache'; -import * as dockerUtils from '../lib/docker-utils'; +import { getNetworkGateway } from '../lib/docker-utils'; import * as constants from '../lib/constants'; import { getStepsFromStrategy } from './update-strategies'; @@ -22,7 +24,6 @@ import { InternalInconsistencyError, isNotFoundError } from '../lib/errors'; import * as config from '../config'; import { checkTruthy, checkString } from '../lib/validation'; import { ServiceComposeConfig, DeviceMetadata } from './types/service'; -import { ImageInspectInfo } from 'dockerode'; import { pathExistsOnRoot } from '../lib/host-utils'; import { isSupervisor } from '../lib/supervisor-metadata'; @@ -101,10 +102,10 @@ export class App { } } - public nextStepsForAppUpdate( + public async nextStepsForAppUpdate( state: UpdateState, target: App, - ): CompositionStep[] { + ): Promise { // Check to see if we need to polyfill in some "new" data for legacy services this.migrateLegacy(target); @@ -130,11 +131,12 @@ export class App { } } - const { removePairs, installPairs, updatePairs } = this.compareServices( - this.services, - target.services, - state.containerIds, - ); + const { removePairs, installPairs, updatePairs } = + await this.compareServices( + this.services, + target.services, + state.containerIds, + ); for (const { current: svc } of removePairs) { // All removes get a kill action if they're not already stopping @@ -147,18 +149,19 @@ export class App { // For every service which needs to be updated, update via update strategy. const servicePairs = updatePairs.concat(installPairs); + const serviceSteps = await Promise.all( + servicePairs.map((pair) => + this.generateStepsForService(pair, { + ...state, + servicePairs, + targetApp: target, + networkPairs: networkChanges, + volumePairs: volumeChanges, + }), + ), + ); steps = steps.concat( - servicePairs - .map((pair) => - this.generateStepsForService(pair, { - ...state, - servicePairs, - targetApp: target, - networkPairs: networkChanges, - volumePairs: volumeChanges, - }), - ) - .filter((step) => step != null) as CompositionStep[], + serviceSteps.filter((step) => step != null) as CompositionStep[], ); // Generate volume steps @@ -297,15 +300,15 @@ export class App { return outputs; } - private compareServices( + private async compareServices( current: Service[], target: Service[], containerIds: Dictionary, - ): { + ): Promise<{ installPairs: Array>; removePairs: Array>; updatePairs: Array>; - } { + }> { const currentByServiceName = _.keyBy(current, 'serviceName'); const targetByServiceName = _.keyBy(target, 'serviceName'); @@ -364,7 +367,7 @@ export class App { * @param serviceCurrent * @param serviceTarget */ - const shouldBeStarted = ( + const shouldBeStarted = async ( serviceCurrent: Service, serviceTarget: Service, ) => { @@ -385,15 +388,13 @@ export class App { // and created by the host. This interface creation may not occur before the container creation. // In this case, the container is created and never started, and the Engine does not attempt to restart it // regardless of restart policy. - const requiresExplicitStart = - ['always', 'unless-stopped'].includes(serviceTarget.config.restart) && - serviceCurrent.status === 'exited' && - serviceCurrent.createdAt != null && - isOlderThan(serviceCurrent.createdAt, SECONDS_TO_WAIT_FOR_START); return ( (serviceCurrent.status === 'Installing' || serviceCurrent.status === 'Installed' || - requiresExplicitStart) && + (await this.requirementsMetForSpecialStart( + serviceCurrent, + serviceTarget, + ))) && isEqualExceptForRunningState(serviceCurrent, serviceTarget) ); }; @@ -415,26 +416,6 @@ export class App { ); }; - /** - * Under the race condition of a container depending on a host resource that may not be there when the Engine - * starts up, resulting in the Engine not restarting the container regardless of restart policy, the Supervisor - * state loop needs to stay alive so that the Supervisor may issue a start step after a wait. - * @param serviceCurrent - */ - const shouldWaitForStart = ( - serviceCurrent: Service, - serviceTarget: Service, - ) => { - return ( - ['always', 'unless-stopped'].includes(serviceTarget.config.restart) && - serviceCurrent.config.running === false && - serviceTarget.config.running === true && - serviceCurrent.status === 'exited' && - serviceCurrent.createdAt != null && - !isOlderThan(serviceCurrent.createdAt, SECONDS_TO_WAIT_FOR_START) - ); - }; - /** * Checks if Supervisor should keep the state loop alive while waiting on a service to stop * @param serviceCurrent @@ -449,19 +430,24 @@ export class App { /** * Filter all the services which should be updated due to run state change, or config mismatch. */ - const toBeUpdated = maybeUpdate - .map((serviceName) => ({ - current: currentByServiceName[serviceName], - target: targetByServiceName[serviceName], - })) - .filter( - ({ current: c, target: t }) => - !isEqualExceptForRunningState(c, t) || - shouldBeStarted(c, t) || - shouldBeStopped(c, t) || - shouldWaitForStop(c) || - shouldWaitForStart(c, t), - ); + const satisfiesRequirementsForUpdate = async (c: Service, t: Service) => + !isEqualExceptForRunningState(c, t) || + (await shouldBeStarted(c, t)) || + shouldBeStopped(c, t) || + shouldWaitForStop(c); + + const maybeUpdatePairs = maybeUpdate.map((serviceName) => ({ + current: currentByServiceName[serviceName], + target: targetByServiceName[serviceName], + })); + const maybeUpdatePairsFiltered = await Promise.all( + maybeUpdatePairs.map(({ current: c, target: t }) => + satisfiesRequirementsForUpdate(c, t), + ), + ); + const toBeUpdated = maybeUpdatePairs.filter( + (__, idx) => maybeUpdatePairsFiltered[idx], + ); return { installPairs: toBeInstalled, @@ -527,7 +513,7 @@ export class App { return steps; } - private generateStepsForService( + private async generateStepsForService( { current, target }: ChangingPair, context: { availableImages: Image[]; @@ -538,7 +524,7 @@ export class App { volumePairs: Array>; servicePairs: Array>; }, - ): Nullable { + ): Promise> { if (current?.status === 'Stopping') { // Theres a kill step happening already, emit a noop to ensure we stay alive while // this happens @@ -592,7 +578,7 @@ export class App { current.isEqualConfig(target, context.containerIds) ) { // we're only starting/stopping a service - return this.generateContainerStep(current, target); + return await this.generateContainerStep(current, target); } let strategy = @@ -666,27 +652,52 @@ export class App { ); } - private generateContainerStep(current: Service, target: Service) { + // In the case where the Engine does not start the container despite the + // restart policy (this can happen in cases of Engine race conditions with + // host resources that are slower to be created but that a service relies on), + // we need to start the container after a delay. + private async requirementsMetForSpecialStart( + current: Service, + target: Service, + ): Promise { + // Shortcut the Engine inspect queries if status isn't exited + if ( + current.status !== 'exited' || + current.config.running !== false || + target.config.running !== true + ) { + return false; + } + let conditionsMetWithRestartPolicy = ['always', 'unless-stopped'].includes( + target.config.restart, + ); + + if (current.containerId != null && !conditionsMetWithRestartPolicy) { + const containerInspect = await inspectByDockerContainerId( + current.containerId, + ); + // If the container has previously been started but exited unsuccessfully, it needs to be started. + // If the container has a restart policy of 'no' and has never been started, it + // should also be started, however, in that case, the status of the container will + // be 'Installed' and the state funnel will already be trying to start it until success, + // so we don't need to add any additional handling. + if ( + target.config.restart === 'on-failure' && + containerInspect.State.ExitCode !== 0 + ) { + conditionsMetWithRestartPolicy = true; + } + } + return conditionsMetWithRestartPolicy; + } + + private async generateContainerStep(current: Service, target: Service) { // if the services release doesn't match, then rename the container... if (current.commit !== target.commit) { return generateStep('updateMetadata', { current, target }); } else if (target.config.running !== current.config.running) { - // In the case where the Engine does not start the container despite the - // restart policy (this can happen in cases of Engine race conditions with - // host resources that are slower to be created but that a service relies on), - // we need to start the container after a delay. - // FIXME: It makes absolutely zero sense that an empty string is the value - // for config.restart if the restart policy is 'no'. - // TODO: There is no way to tell from a Service instance whether a service - // exited with a non-zero exit code, therefore this method cannot determine - // whether to start a service with a restart policy of 'on-failure'. There - // is no way to get the exit message from a Service instance either, so we - // can't parse the error message to determine whether to start a service with - // a restart policy of 'no' (in case a oneshot suffered from this race condition). if ( - ['always', 'unless-stopped'].includes(target.config.restart) && - current.status === 'exited' && - current.createdAt != null && + (await this.requirementsMetForSpecialStart(current, target)) && !isOlderThan(current.createdAt, SECONDS_TO_WAIT_FOR_START) ) { return generateStep('noop', {}); @@ -827,9 +838,9 @@ export class App { const [opts, supervisorApiHost, hostPathExists, hostname] = await Promise.all([ config.get('extendedEnvOptions'), - dockerUtils - .getNetworkGateway(constants.supervisorNetworkInterface) - .catch(() => '127.0.0.1'), + getNetworkGateway(constants.supervisorNetworkInterface).catch( + () => '127.0.0.1', + ), (async () => ({ firmware: await pathExistsOnRoot('/lib/firmware'), modules: await pathExistsOnRoot('/lib/modules'), diff --git a/src/compose/application-manager.ts b/src/compose/application-manager.ts index f25ee913..f352938e 100644 --- a/src/compose/application-manager.ts +++ b/src/compose/application-manager.ts @@ -207,7 +207,7 @@ export async function inferNextSteps( // do to move to the target state for (const id of targetAndCurrent) { steps = steps.concat( - currentApps[id].nextStepsForAppUpdate( + await currentApps[id].nextStepsForAppUpdate( { availableImages, containerIds: containerIdsByAppId[id], @@ -243,7 +243,7 @@ export async function inferNextSteps( false, ); steps = steps.concat( - emptyCurrent.nextStepsForAppUpdate( + await emptyCurrent.nextStepsForAppUpdate( { availableImages, containerIds: containerIdsByAppId[id] ?? {}, diff --git a/src/compose/service-manager.ts b/src/compose/service-manager.ts index 90279e06..8e4e679e 100644 --- a/src/compose/service-manager.ts +++ b/src/compose/service-manager.ts @@ -135,7 +135,7 @@ export async function getState() { export async function getByDockerContainerId( containerId: string, ): Promise { - const container = await docker.getContainer(containerId).inspect(); + const container = await inspectByDockerContainerId(containerId); if ( container.Config.Labels['io.balena.supervised'] == null && container.Config.Labels['io.resin.supervised'] == null @@ -145,6 +145,12 @@ export async function getByDockerContainerId( return Service.fromDockerContainer(container); } +export async function inspectByDockerContainerId( + containerId: string, +): Promise { + return await docker.getContainer(containerId).inspect(); +} + export async function updateMetadata(service: Service, target: Service) { const svc = await get(service); if (svc.containerId == null) { diff --git a/src/compose/utils.ts b/src/compose/utils.ts index c4a87067..413e5804 100644 --- a/src/compose/utils.ts +++ b/src/compose/utils.ts @@ -694,6 +694,9 @@ export function dockerMountToServiceMount( return mount as LongDefinition; } -export function isOlderThan(currentTime: Date, seconds: number) { +export function isOlderThan(currentTime: Date | null, seconds: number) { + if (currentTime == null) { + return false; + } return new Date().getTime() - currentTime.getTime() > seconds * 1000; } diff --git a/test/unit/compose/app.spec.ts b/test/unit/compose/app.spec.ts index d9abc80a..d6237295 100644 --- a/test/unit/compose/app.spec.ts +++ b/test/unit/compose/app.spec.ts @@ -111,7 +111,7 @@ const defaultNetwork = Network.fromComposeObject('default', 1, 'appuuid', {}); describe('compose/app', () => { describe('volume state behavior', () => { - it('should correctly infer a volume create step', () => { + it('should correctly infer a volume create step', async () => { // Setup current and target apps const current = createApp(); const target = createApp({ @@ -120,7 +120,7 @@ describe('compose/app', () => { }); // Calculate the steps - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); // Check that a createVolume step has been created const [createVolumeStep] = expectSteps('createVolume', steps); @@ -129,7 +129,7 @@ describe('compose/app', () => { .that.deep.includes({ name: 'test-volume' }); }); - it('should correctly infer more than one volume create step', () => { + it('should correctly infer more than one volume create step', async () => { const current = createApp(); const target = createApp({ volumes: [ @@ -139,7 +139,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); // Check that 2 createVolume steps are found const createVolumeSteps = expectSteps('createVolume', steps, 2); @@ -160,7 +160,7 @@ describe('compose/app', () => { }); // We don't remove volumes until the end - it('should not infer a volume remove step when the app is still referenced', () => { + it('should not infer a volume remove step when the app is still referenced', async () => { const current = createApp({ volumes: [ Volume.fromComposeObject('test-volume', 1, 'deadbeef'), @@ -172,11 +172,11 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); expectNoStep('removeVolume', steps); }); - it('should correctly infer volume recreation steps', () => { + it('should correctly infer volume recreation steps', async () => { const current = createApp({ volumes: [Volume.fromComposeObject('test-volume', 1, 'deadbeef')], }); @@ -190,7 +190,7 @@ describe('compose/app', () => { }); // First step should create a volume removal step - const stepsForRemoval = current.nextStepsForAppUpdate( + const stepsForRemoval = await current.nextStepsForAppUpdate( defaultContext, target, ); @@ -212,7 +212,7 @@ describe('compose/app', () => { }); // This test is extra since we have already tested that the volume gets created - const stepsForCreation = intermediate.nextStepsForAppUpdate( + const stepsForCreation = await intermediate.nextStepsForAppUpdate( defaultContext, target, ); @@ -254,7 +254,7 @@ describe('compose/app', () => { }); // Calculate steps - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [killStep] = expectSteps('kill', steps); expect(killStep) @@ -294,7 +294,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); expectNoStep('kill', steps); }); @@ -333,7 +333,7 @@ describe('compose/app', () => { }); // Step 1: kill - const steps = current.nextStepsForAppUpdate( + const steps = await current.nextStepsForAppUpdate( contextWithImages, intermediateTarget, ); @@ -341,7 +341,7 @@ describe('compose/app', () => { // Step 2: noop (service is stopping) service.status = 'Stopping'; - const secondStageSteps = current.nextStepsForAppUpdate( + const secondStageSteps = await current.nextStepsForAppUpdate( contextWithImages, intermediateTarget, ); @@ -355,7 +355,7 @@ describe('compose/app', () => { volumes: [volume], }); expect( - currentWithServiceRemoved.nextStepsForAppUpdate( + await currentWithServiceRemoved.nextStepsForAppUpdate( contextWithImages, intermediateTarget, ), @@ -377,7 +377,7 @@ describe('compose/app', () => { isTarget: true, }); const recreateVolumeSteps = - currentWithVolumesRemoved.nextStepsForAppUpdate( + await currentWithVolumesRemoved.nextStepsForAppUpdate( contextWithImages, target, ); @@ -393,7 +393,7 @@ describe('compose/app', () => { }); const createServiceSteps = - currentWithVolumeRecreated.nextStepsForAppUpdate( + await currentWithVolumeRecreated.nextStepsForAppUpdate( contextWithImages, target, ); @@ -402,14 +402,14 @@ describe('compose/app', () => { }); describe('network state behavior', () => { - it('should correctly infer a network create step', () => { + it('should correctly infer a network create step', async () => { const current = createApp({ networks: [] }); const target = createApp({ networks: [Network.fromComposeObject('default', 1, 'deadbeef', {})], isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [createNetworkStep] = expectSteps('createNetwork', steps); expect(createNetworkStep).to.have.property('target').that.deep.includes({ @@ -417,7 +417,7 @@ describe('compose/app', () => { }); }); - it('should correctly infer a network remove step', () => { + it('should correctly infer a network remove step', async () => { const current = createApp({ networks: [ Network.fromComposeObject('test-network', 1, 'deadbeef', {}), @@ -425,7 +425,7 @@ describe('compose/app', () => { }); const target = createApp({ networks: [], isTarget: true }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [removeNetworkStep] = expectSteps('removeNetwork', steps); @@ -434,7 +434,7 @@ describe('compose/app', () => { }); }); - it('should correctly remove default duplicate networks', () => { + it('should correctly remove default duplicate networks', async () => { const current = createApp({ networks: [defaultNetwork, defaultNetwork], }); @@ -443,7 +443,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [removeNetworkStep] = expectSteps('removeNetwork', steps); @@ -452,7 +452,7 @@ describe('compose/app', () => { }); }); - it('should correctly remove duplicate networks', () => { + it('should correctly remove duplicate networks', async () => { const current = createApp({ networks: [ Network.fromComposeObject('test-network', 1, 'deadbeef', {}), @@ -468,7 +468,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [removeNetworkStep] = expectSteps('removeNetwork', steps); @@ -477,7 +477,7 @@ describe('compose/app', () => { }); }); - it('should ignore the duplicates if there are changes already', () => { + it('should ignore the duplicates if there are changes already', async () => { const current = createApp({ networks: [ Network.fromComposeObject('test-network', 1, 'deadbeef', {}), @@ -494,7 +494,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [removeNetworkStep] = expectSteps('removeNetwork', steps); expect(removeNetworkStep).to.have.property('current').that.deep.includes({ @@ -534,7 +534,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [removeNetworkStep] = expectSteps('kill', steps); @@ -543,7 +543,7 @@ describe('compose/app', () => { }); }); - it('should correctly infer more than one network removal step', () => { + it('should correctly infer more than one network removal step', async () => { const current = createApp({ networks: [ Network.fromComposeObject('test-network', 1, 'deadbeef', {}), @@ -553,7 +553,7 @@ describe('compose/app', () => { }); const target = createApp({ networks: [], isTarget: true }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [first, second] = expectSteps('removeNetwork', steps, 2); @@ -565,7 +565,7 @@ describe('compose/app', () => { }); }); - it('should correctly infer a network recreation step', () => { + it('should correctly infer a network recreation step', async () => { const current = createApp({ networks: [ Network.fromComposeObject('test-network', 1, 'deadbeef', {}), @@ -580,7 +580,7 @@ describe('compose/app', () => { isTarget: true, }); - const stepsForRemoval = current.nextStepsForAppUpdate( + const stepsForRemoval = await current.nextStepsForAppUpdate( defaultContext, target, ); @@ -595,7 +595,7 @@ describe('compose/app', () => { networks: [], }); - const stepsForCreation = intermediate.nextStepsForAppUpdate( + const stepsForCreation = await intermediate.nextStepsForAppUpdate( defaultContext, target, ); @@ -641,7 +641,7 @@ describe('compose/app', () => { const availableImages = [createImage({ appUuid: 'deadbeef' })]; - const steps = current.nextStepsForAppUpdate( + const steps = await current.nextStepsForAppUpdate( { ...defaultContext, availableImages }, target, ); @@ -674,7 +674,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [killStep] = expectSteps('kill', steps); expect(killStep) @@ -730,7 +730,7 @@ describe('compose/app', () => { createImage({ appId: 1, serviceName: 'two', name: 'alpine' }), ]; - const steps = current.nextStepsForAppUpdate( + const steps = await current.nextStepsForAppUpdate( { ...defaultContext, availableImages }, target, ); @@ -793,7 +793,7 @@ describe('compose/app', () => { createImage({ appId: 1, serviceName: 'two', name: 'alpine' }), ]; - const steps = current.nextStepsForAppUpdate( + const steps = await current.nextStepsForAppUpdate( { ...defaultContext, availableImages }, target, ); @@ -808,11 +808,11 @@ describe('compose/app', () => { expectNoStep('removeNetwork', steps); }); - it('should create the default network if it does not exist', () => { + it('should create the default network if it does not exist', async () => { const current = createApp({ networks: [] }); const target = createApp({ networks: [], isTarget: true }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); // A default network should always be created const [createNetworkStep] = expectSteps('createNetwork', steps); @@ -821,13 +821,13 @@ describe('compose/app', () => { .that.deep.includes({ name: 'default' }); }); - it('should not create the default network if it already exists', () => { + it('should not create the default network if it already exists', async () => { const current = createApp({ networks: [Network.fromComposeObject('default', 1, 'deadbeef', {})], }); const target = createApp({ networks: [], isTarget: true }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); // The network should not be created again expectNoStep('createNetwork', steps); @@ -854,7 +854,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [createNetworkStep] = expectSteps('createNetwork', steps); expect(createNetworkStep) @@ -883,7 +883,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [createNetworkStep] = expectSteps('createNetwork', steps); expect(createNetworkStep) @@ -896,7 +896,7 @@ describe('compose/app', () => { const current = createApp({}); const target = createApp({ isTarget: true }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [createNetworkStep] = expectSteps('createNetwork', steps); expect(createNetworkStep) @@ -921,7 +921,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [killStep] = expectSteps('kill', steps); expect(killStep) .to.have.property('current') @@ -939,7 +939,7 @@ describe('compose/app', () => { }); const target = createApp({ services: [], isTarget: true }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); expectSteps('noop', steps); // Kill was already emitted for this service @@ -959,7 +959,7 @@ describe('compose/app', () => { services: [await createService({ serviceName: 'main', running: true })], }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); expectSteps('noop', steps); }); @@ -977,7 +977,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [removeStep] = expectSteps('remove', steps); expect(removeStep) @@ -996,7 +996,7 @@ describe('compose/app', () => { }); const target = createApp({ services: [], isTarget: true }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [removeStep] = expectSteps('remove', steps); expect(removeStep) @@ -1013,7 +1013,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate( + const steps = await current.nextStepsForAppUpdate( { ...defaultContext, ...{ downloading: ['main-image'] } }, target, ); @@ -1034,7 +1034,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [updateMetadataStep] = expectSteps('updateMetadata', steps); expect(updateMetadataStep) @@ -1057,7 +1057,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [stopStep] = expectSteps('stop', steps); expect(stopStep) .to.have.property('current') @@ -1086,7 +1086,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); expectNoStep('start', steps); }); @@ -1118,7 +1118,7 @@ describe('compose/app', () => { }); // should see a 'stop' - const stepsToIntermediate = current.nextStepsForAppUpdate( + const stepsToIntermediate = await current.nextStepsForAppUpdate( contextWithImages, target, ); @@ -1135,7 +1135,7 @@ describe('compose/app', () => { }); // now should see a 'start' - const stepsToTarget = intermediate.nextStepsForAppUpdate( + const stepsToTarget = await intermediate.nextStepsForAppUpdate( contextWithImages, target, ); @@ -1193,7 +1193,7 @@ describe('compose/app', () => { isTarget: true, }); - const stepsToIntermediate = current.nextStepsForAppUpdate( + const stepsToIntermediate = await current.nextStepsForAppUpdate( contextWithImages, target, ); @@ -1216,7 +1216,7 @@ describe('compose/app', () => { }); // we should now see a start for the 'main' service... - const stepsToTarget = intermediate.nextStepsForAppUpdate( + const stepsToTarget = await intermediate.nextStepsForAppUpdate( { ...contextWithImages, ...{ containerIds: { dep: 'dep-id' } } }, target, ); @@ -1248,7 +1248,10 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(contextWithImages, target); + const steps = await current.nextStepsForAppUpdate( + contextWithImages, + target, + ); // There should be no steps since the engine manages restart policy for stopped containers expect(steps.length).to.equal(0); @@ -1292,7 +1295,7 @@ describe('compose/app', () => { isTarget: true, }); - const stepsToIntermediate = current.nextStepsForAppUpdate( + const stepsToIntermediate = await current.nextStepsForAppUpdate( contextWithImages, target, ); @@ -1308,7 +1311,7 @@ describe('compose/app', () => { networks: [defaultNetwork], }); - const stepsToTarget = intermediate.nextStepsForAppUpdate( + const stepsToTarget = await intermediate.nextStepsForAppUpdate( contextWithImages, target, ); @@ -1379,7 +1382,10 @@ describe('compose/app', () => { }); // No kill steps should be generated - const steps = current.nextStepsForAppUpdate(contextWithImages, target); + const steps = await current.nextStepsForAppUpdate( + contextWithImages, + target, + ); expectNoStep('kill', steps); }); @@ -1421,7 +1427,7 @@ describe('compose/app', () => { isTarget: true, }); - const stepsFirstTry = current.nextStepsForAppUpdate( + const stepsFirstTry = await current.nextStepsForAppUpdate( contextWithImages, target, ); @@ -1432,7 +1438,7 @@ describe('compose/app', () => { .that.deep.includes({ serviceName: 'main' }); // if at first you don't succeed - const stepsSecondTry = current.nextStepsForAppUpdate( + const stepsSecondTry = await current.nextStepsForAppUpdate( contextWithImages, target, ); @@ -1464,7 +1470,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [killStep] = expectSteps('kill', steps); expect(killStep) .to.have.property('current') @@ -1487,7 +1493,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [createNetworkStep] = expectSteps('createNetwork', steps); expect(createNetworkStep) .to.have.property('target') @@ -1528,7 +1534,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); expectSteps('kill', steps, 2); }); @@ -1590,7 +1596,10 @@ describe('compose/app', () => { }); // No kill steps should be generated - const steps = current.nextStepsForAppUpdate(contextWithImages, target); + const steps = await current.nextStepsForAppUpdate( + contextWithImages, + target, + ); expectNoStep('kill', steps); }); @@ -1629,7 +1638,10 @@ describe('compose/app', () => { }); // No kill steps should be generated - const steps = current.nextStepsForAppUpdate(contextWithImages, target); + const steps = await current.nextStepsForAppUpdate( + contextWithImages, + target, + ); expectNoStep('start', steps); }); @@ -1673,7 +1685,10 @@ describe('compose/app', () => { }); // No kill steps should be generated - const steps = current.nextStepsForAppUpdate(contextWithImages, target); + const steps = await current.nextStepsForAppUpdate( + contextWithImages, + target, + ); expectSteps('start', steps, 2); }); }); @@ -1686,7 +1701,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [fetchStep] = expectSteps('fetch', steps); expect(fetchStep) .to.have.property('image') @@ -1708,7 +1723,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate( + const steps = await current.nextStepsForAppUpdate( contextWithDownloading, target, ); @@ -1724,7 +1739,7 @@ describe('compose/app', () => { isTarget: true, }); - const steps = current.nextStepsForAppUpdate(defaultContext, target); + const steps = await current.nextStepsForAppUpdate(defaultContext, target); const [fetchStep] = expectSteps('fetch', steps); expect(fetchStep)