Merge pull request #1038 from balena-io/roman/suicide-prevention

Prevent supervisor from deleting itself
This commit is contained in:
Roman Mazur 2019-07-25 16:17:41 +03:00 committed by GitHub
commit 5aa9f31c3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 134 additions and 15 deletions

View File

@ -9,6 +9,7 @@ const constants = {
rootMountPoint,
databasePath:
checkString(process.env.DATABASE_PATH) || '/data/database.sqlite',
containerId: checkString(process.env.SUPERVISOR_CONTAINER_ID) || undefined,
dockerSocket: process.env.DOCKER_SOCKET || '/var/run/docker.sock',
supervisorImage:
checkString(process.env.SUPERVISOR_IMAGE) || 'resin/rpi-supervisor',

View File

@ -62,3 +62,9 @@ export class ConfigurationValidationError extends TypedError {
}
export class ImageAuthenticationError extends TypedError {}
/**
* An error thrown if our own container cannot be inspected.
* See LocalModeManager for a usage example.
*/
export class SupervisorContainerNotFoundError extends TypedError {}

View File

@ -4,6 +4,8 @@ import * as _ from 'lodash';
import Config from './config';
import Database from './db';
import * as constants from './lib/constants';
import { SupervisorContainerNotFoundError } from './lib/errors';
import log from './lib/supervisor-console';
import { Logger } from './logger';
@ -56,6 +58,9 @@ export class EngineSnapshotRecord {
) {}
}
/** Container name used to inspect own resources when container ID cannot be resolved. */
const SUPERVISOR_CONTAINER_NAME_FALLBACK = 'resin_supervisor';
/**
* This class handles any special cases necessary for switching
* modes in localMode.
@ -70,6 +75,7 @@ export class LocalModeManager {
public docker: Docker,
public logger: Logger,
public db: Database,
private containerId: string | undefined = constants.containerId,
) {}
// Indicates that switch from or to the local mode is not complete.
@ -142,6 +148,39 @@ export class LocalModeManager {
);
}
private async collectContainerResources(
nameOrId: string,
): Promise<EngineSnapshot> {
const inspectInfo = await this.docker.getContainer(nameOrId).inspect();
return new EngineSnapshot(
[inspectInfo.Id],
[inspectInfo.Image],
inspectInfo.Mounts.filter(m => m.Name != null).map(m => m.Name!),
_.map(inspectInfo.NetworkSettings.Networks, n => n.NetworkID),
);
}
// Determine what engine objects are linked to our own container.
private async collectOwnResources(): Promise<EngineSnapshot> {
try {
return this.collectContainerResources(
this.containerId || SUPERVISOR_CONTAINER_NAME_FALLBACK,
);
} catch (e) {
if (this.containerId !== undefined) {
// Inspect operation fails (container ID is out of sync?).
const fallback = SUPERVISOR_CONTAINER_NAME_FALLBACK;
log.warn(
'Supervisor container resources cannot be obtained by container ID. ' +
`Using '${fallback}' name instead.`,
e.message,
);
return this.collectContainerResources(fallback);
}
throw new SupervisorContainerNotFoundError(e);
}
}
private async cleanEngineSnapshots() {
await this.db.models('engineSnapshot').delete();
}
@ -248,11 +287,16 @@ export class LocalModeManager {
return;
}
const supervisorResources = await this.collectOwnResources();
log.debug(`${supervisorResources} are linked to current supervisor`);
log.debug(
`Leaving local mode and cleaning up objects since ${previousRecord.timestamp.toISOString()}`,
);
await this.removeLocalModeArtifacts(
currentRecord.snapshot.diff(previousRecord.snapshot),
currentRecord.snapshot
.diff(previousRecord.snapshot)
.diff(supervisorResources),
);
await this.cleanEngineSnapshots();
} catch (e) {

View File

@ -19,6 +19,8 @@ describe('LocalModeManager', () => {
let localMode: LocalModeManager;
let dockerStub: sinon.SinonStubbedInstance<Docker>;
const supervisorContainerId = 'super-container-1';
const recordsCount = async () =>
await db
.models('engineSnapshot')
@ -46,6 +48,7 @@ describe('LocalModeManager', () => {
(dockerStub as unknown) as Docker,
loggerStub,
db,
supervisorContainerId,
);
});
@ -126,6 +129,32 @@ describe('LocalModeManager', () => {
});
describe('local mode switch', () => {
// Info returned when we inspect our own container.
const supervisorContainer = {
Id: 'super-container-1',
State: {
Status: 'running',
Running: true,
},
Image: 'super-image-1',
HostConfig: {
ContainerIDFile: '/resin-data/resin-supervisor/container-id',
},
Mounts: [
{
Type: 'volume',
Name: 'super-volume-1',
},
],
NetworkSettings: {
Networks: {
'some-name': {
NetworkID: 'super-network-1',
},
},
},
};
const storeCurrentSnapshot = async (
containers: string[],
images: string[],
@ -140,20 +169,21 @@ describe('LocalModeManager', () => {
);
};
interface RemoveableObject {
interface EngineStubbedObject {
remove(): Promise<void>;
inspect(): Promise<any>;
}
// Stub get<Object> methods on docker, so we can verify remove calls.
const stubRemoveMethods = (
const stubEngineObjectMethods = (
removeThrows: boolean,
): Array<sinon.SinonStubbedInstance<RemoveableObject>> => {
): Array<sinon.SinonStubbedInstance<EngineStubbedObject>> => {
const resArray: Array<
sinon.SinonStubbedInstance<RemoveableObject>
sinon.SinonStubbedInstance<EngineStubbedObject>
> = [];
const stub = <T>(
c: sinon.StubbableType<RemoveableObject>,
c: sinon.StubbableType<EngineStubbedObject>,
type: string,
) => {
const res = sinon.createStubInstance(c);
@ -162,6 +192,11 @@ describe('LocalModeManager', () => {
} else {
res.remove.resolves();
}
if (c === Docker.Container) {
res.inspect.resolves(supervisorContainer);
}
resArray.push(res);
return (res as unknown) as T;
};
@ -187,9 +222,9 @@ describe('LocalModeManager', () => {
});
it('deletes newly created objects on local mode exit', async () => {
const removeStubs = stubRemoveMethods(false);
const removeStubs = stubEngineObjectMethods(false);
// All the objects returned by list<Objects> are not included into this snapshot.
// Hence, removal shoulf be called twice (stubbed methods return 2 objects per type).
// Hence, removal should be called twice (stubbed methods return 2 objects per type).
await storeCurrentSnapshot(
['previous-container'],
['previous-image'],
@ -203,7 +238,7 @@ describe('LocalModeManager', () => {
});
it('keeps objects from the previous snapshot on local mode exit', async () => {
const removeStubs = stubRemoveMethods(false);
const removeStubs = stubEngineObjectMethods(false);
// With this snapshot, only <object>-2 must be removed from the engine.
await storeCurrentSnapshot(
['container-1'],
@ -223,7 +258,7 @@ describe('LocalModeManager', () => {
});
it('logs but consumes cleanup errors on local mode exit', async () => {
const removeStubs = stubRemoveMethods(true);
const removeStubs = stubEngineObjectMethods(true);
// This snapshot will cause the logic to remove everything.
await storeCurrentSnapshot([], [], [], []);
@ -235,7 +270,7 @@ describe('LocalModeManager', () => {
});
it('skips cleanup without previous snapshot on local mode exit', async () => {
const removeStubs = stubRemoveMethods(false);
const removeStubs = stubEngineObjectMethods(false);
await localMode.handleLocalModeStateChange(false);
@ -247,7 +282,7 @@ describe('LocalModeManager', () => {
});
it('can be awaited', async () => {
const removeStubs = stubRemoveMethods(false);
const removeStubs = stubEngineObjectMethods(false);
await storeCurrentSnapshot([], [], [], []);
// Run asynchronously (like on config change).
@ -260,7 +295,7 @@ describe('LocalModeManager', () => {
});
it('cleans the last snapshot so that nothing is done on restart', async () => {
const removeStubs = stubRemoveMethods(false);
const removeStubs = stubEngineObjectMethods(false);
await storeCurrentSnapshot([], [], [], []);
await localMode.handleLocalModeStateChange(false);
@ -273,14 +308,14 @@ describe('LocalModeManager', () => {
// So our stubs must be called only once.
// We delete 2 objects of each type during the first call, so number of getXXX and remove calls is 2.
expect(dockerStub.getImage.callCount).to.be.equal(2);
expect(dockerStub.getContainer.callCount).to.be.equal(2);
expect(dockerStub.getContainer.callCount).to.be.equal(3); // +1 for supervisor inspect call.
expect(dockerStub.getVolume.callCount).to.be.equal(2);
expect(dockerStub.getNetwork.callCount).to.be.equal(2);
removeStubs.forEach(s => expect(s.remove.callCount).to.be.equal(2));
});
it('skips cleanup in case of data corruption', async () => {
const removeStubs = stubRemoveMethods(false);
const removeStubs = stubEngineObjectMethods(false);
await db.models('engineSnapshot').insert({
snapshot: 'bad json',
@ -296,6 +331,39 @@ describe('LocalModeManager', () => {
expect(dockerStub.getNetwork.notCalled).to.be.true;
removeStubs.forEach(s => expect(s.remove.notCalled).to.be.true);
});
describe('with supervisor being updated', () => {
beforeEach(() => {
// We make supervisor own resources to match currently listed objects.
supervisorContainer.Id = 'container-1';
supervisorContainer.Image = 'image-1';
supervisorContainer.NetworkSettings.Networks['some-name'].NetworkID =
'network-1';
supervisorContainer.Mounts[0].Name = 'volume-1';
});
it('does not delete its own object', async () => {
const removeStubs = stubEngineObjectMethods(false);
// All the current engine objects will be new, including container-1 which is the supervisor.
await storeCurrentSnapshot(
['previous-container'],
['previous-image'],
['previous-volume'],
['previous-network'],
);
await localMode.handleLocalModeStateChange(false);
// Ensure we inspect our own container.
const [, containerStub] = removeStubs;
expect(containerStub.inspect.calledOnce).to.be.true;
// Current engine objects include 2 entities of each type.
// Container-1, network-1, image-1, and volume-1 are resources associated with currently running supervisor.
// Only xxx-2 objects must be deleted.
removeStubs.forEach(s => expect(s.remove.calledOnce).to.be.true);
});
});
});
});