mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-01-31 08:25:36 +00:00
Prevent updates/reboots with locks when HUP breadcrumbs present
On HUP, some healthceck services need to complete before it's safe for the Supervisor to reboot the device when applying state changes. rollback-{health|altboot}-breadcrumb are the two files that Supervisor looks for and locks the device on when present in this patch. Not closing issue 1459 because there is a possible case where, on altboot rollback, the breadcrumbs are not present. 1459 may be closed when this edge case is investigated. Change-type: patch Connects-to: #1459 See: https://www.flowdock.com/app/rulemotion/r-supervisor/threads/cL7YfNOLSfTPfw05h59GEW0kfOt Signed-off-by: Christina Wang <christina@balena.io>
This commit is contained in:
parent
5223262557
commit
a9028e58ec
@ -638,6 +638,7 @@ export function reportCurrentState(
|
||||
}
|
||||
|
||||
export async function reboot(force?: boolean, skipLock?: boolean) {
|
||||
await updateLock.ensureNoHUPBreadcrumbsOnHost();
|
||||
await applicationManager.stopAll({ force, skipLock });
|
||||
logger.logSystemMessage('Rebooting', {}, 'Reboot');
|
||||
const $reboot = await dbus.reboot();
|
||||
@ -647,6 +648,7 @@ export async function reboot(force?: boolean, skipLock?: boolean) {
|
||||
}
|
||||
|
||||
export async function shutdown(force?: boolean, skipLock?: boolean) {
|
||||
await updateLock.ensureNoHUPBreadcrumbsOnHost();
|
||||
await applicationManager.stopAll({ force, skipLock });
|
||||
logger.logSystemMessage('Shutting down', {}, 'Shutdown');
|
||||
const $shutdown = await dbus.shutdown();
|
||||
@ -757,7 +759,7 @@ function applyError(
|
||||
if (err instanceof UpdatesLockedError) {
|
||||
const message = `Updates are locked, retrying in ${prettyMs(delay, {
|
||||
compact: true,
|
||||
})}...`;
|
||||
})}. Reason: ${err.message}`;
|
||||
logger.logSystemMessage(message, {}, 'updateLocked', false);
|
||||
log.info(message);
|
||||
} else {
|
||||
|
@ -7,6 +7,7 @@ const supervisorNetworkInterface = 'supervisor0';
|
||||
|
||||
const constants = {
|
||||
rootMountPoint,
|
||||
stateMountPoint: '/mnt/state',
|
||||
databasePath:
|
||||
checkString(process.env.DATABASE_PATH) || '/data/database.sqlite',
|
||||
containerId: checkString(process.env.SUPERVISOR_CONTAINER_ID) || undefined,
|
||||
|
@ -5,8 +5,9 @@ import { promises as fs } from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as Lock from 'rwlock';
|
||||
|
||||
import { ENOENT, UpdatesLockedError } from './errors';
|
||||
import { getPathOnHost } from './fs-utils';
|
||||
import * as constants from './constants';
|
||||
import { ENOENT, EEXIST, UpdatesLockedError } from './errors';
|
||||
import { getPathOnHost, pathExistsOnHost } from './fs-utils';
|
||||
|
||||
type asyncLockFile = typeof lockFileLib & {
|
||||
unlockAsync(path: string): Bluebird<void>;
|
||||
@ -35,6 +36,29 @@ function lockFilesOnHost(appId: number, serviceName: string): string[] {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for rollback-{health|altboot}-breadcrumb, two files that exist while
|
||||
* rollback-{health|altboot}.service have not exited. If these files exist,
|
||||
* prevent reboot. If the Supervisor reboots while those services are still running,
|
||||
* the device may become stuck in an invalid state during HUP.
|
||||
*/
|
||||
export function ensureNoHUPBreadcrumbsOnHost(): Promise<boolean | never> {
|
||||
return Promise.all(
|
||||
[
|
||||
'rollback-health-breadcrumb',
|
||||
'rollback-altboot-breadcrumb',
|
||||
].map((filename) =>
|
||||
pathExistsOnHost(path.join(constants.stateMountPoint, filename)),
|
||||
),
|
||||
).then((existsArray) => {
|
||||
const anyExists = existsArray.some((exists) => exists);
|
||||
if (anyExists) {
|
||||
throw new UpdatesLockedError('Waiting for Host OS update to finish');
|
||||
}
|
||||
return anyExists;
|
||||
});
|
||||
}
|
||||
|
||||
const locksTaken: { [lockName: string]: boolean } = {};
|
||||
|
||||
// Try to clean up any existing locks when the program exits
|
||||
@ -66,6 +90,24 @@ function dispose(release: () => void): Bluebird<void> {
|
||||
.return();
|
||||
}
|
||||
|
||||
const lockExistsErrHandler = (err: Error, release: () => void) => {
|
||||
let errMsg = err.message;
|
||||
if (EEXIST(err)) {
|
||||
// Extract appId|appUuid and serviceName from lockfile path for log message
|
||||
// appId: [0-9]{7}, appUuid: [0-9a-w]{32}, short appUuid: [0-9a-w]{7}
|
||||
const pathMatch = err.message.match(
|
||||
/\/([0-9]{7}|[0-9a-w]{32}|[0-9a-w]{7})\/(.*)\/(?:resin-)?updates.lock/,
|
||||
);
|
||||
if (pathMatch && pathMatch.length === 3) {
|
||||
errMsg = `Lockfile exists for ${JSON.stringify({
|
||||
serviceName: pathMatch[2],
|
||||
[/^[0-9]{7}$/.test(pathMatch[1]) ? 'appId' : 'appUuid']: pathMatch[1],
|
||||
})}`;
|
||||
}
|
||||
}
|
||||
return dispose(release).throw(new UpdatesLockedError(errMsg));
|
||||
};
|
||||
|
||||
/**
|
||||
* Try to take the locks for an application. If force is set, it will remove
|
||||
* all existing lockfiles before performing the operation
|
||||
@ -100,12 +142,9 @@ export function lock(
|
||||
})
|
||||
.catchReturn(ENOENT, undefined);
|
||||
},
|
||||
).catch((err) => {
|
||||
return dispose(release).throw(
|
||||
new UpdatesLockedError(`Updates are locked: ${err.message}`),
|
||||
);
|
||||
});
|
||||
});
|
||||
);
|
||||
})
|
||||
.catch((err) => lockExistsErrHandler(err, release));
|
||||
})
|
||||
.disposer(dispose);
|
||||
};
|
||||
|
@ -1,8 +1,8 @@
|
||||
import * as _ from 'lodash';
|
||||
import { stub } from 'sinon';
|
||||
import { SinonStub, stub } from 'sinon';
|
||||
import { expect } from 'chai';
|
||||
|
||||
import { StatusCodeError } from '../src/lib/errors';
|
||||
import { StatusCodeError, UpdatesLockedError } from '../src/lib/errors';
|
||||
import prepare = require('./lib/prepare');
|
||||
import * as dockerUtils from '../src/lib/docker-utils';
|
||||
import * as config from '../src/config';
|
||||
@ -13,6 +13,7 @@ import * as deviceConfig from '../src/device-config';
|
||||
import { loadTargetFromFile } from '../src/device-state/preload';
|
||||
import Service from '../src/compose/service';
|
||||
import { intialiseContractRequirements } from '../src/lib/contracts';
|
||||
import * as updateLock from '../src/lib/update-lock';
|
||||
|
||||
const mockedInitialConfig = {
|
||||
RESIN_SUPERVISOR_CONNECTIVITY_CHECK: 'true',
|
||||
@ -355,4 +356,20 @@ describe('deviceState', () => {
|
||||
it.skip('applies the target state for device config');
|
||||
|
||||
it.skip('applies the target state for applications');
|
||||
|
||||
it('prevents reboot or shutdown when HUP rollback breadcrumbs are present', async () => {
|
||||
const testErrMsg = 'Waiting for Host OS updates to finish';
|
||||
stub(updateLock, 'ensureNoHUPBreadcrumbsOnHost').throws(
|
||||
new UpdatesLockedError(testErrMsg),
|
||||
);
|
||||
|
||||
await expect(deviceState.reboot())
|
||||
.to.eventually.be.rejectedWith(testErrMsg)
|
||||
.and.be.an.instanceOf(UpdatesLockedError);
|
||||
await expect(deviceState.shutdown())
|
||||
.to.eventually.be.rejectedWith(testErrMsg)
|
||||
.and.be.an.instanceOf(UpdatesLockedError);
|
||||
|
||||
(updateLock.ensureNoHUPBreadcrumbsOnHost as SinonStub).restore();
|
||||
});
|
||||
});
|
||||
|
251
test/src/lib/update-lock.spec.ts
Normal file
251
test/src/lib/update-lock.spec.ts
Normal file
@ -0,0 +1,251 @@
|
||||
import { expect } from 'chai';
|
||||
import { SinonSpy, SinonStub, spy, stub } from 'sinon';
|
||||
import * as path from 'path';
|
||||
import * as Bluebird from 'bluebird';
|
||||
|
||||
import rewire = require('rewire');
|
||||
import mockFs = require('mock-fs');
|
||||
|
||||
import * as constants from '../../../src/lib/constants';
|
||||
import { UpdatesLockedError } from '../../../src/lib/errors';
|
||||
|
||||
describe('lib/update-lock', () => {
|
||||
const updateLock = rewire('../../../src/lib/update-lock');
|
||||
const breadcrumbFiles = [
|
||||
'rollback-health-breadcrumb',
|
||||
'rollback-altboot-breadcrumb',
|
||||
];
|
||||
|
||||
const mockBreadcrumbs = (breadcrumb?: string) => {
|
||||
mockFs({
|
||||
[path.join(
|
||||
constants.rootMountPoint,
|
||||
constants.stateMountPoint,
|
||||
breadcrumb ? breadcrumb : '',
|
||||
)]: '',
|
||||
});
|
||||
};
|
||||
|
||||
const mockLockDir = ({
|
||||
appId,
|
||||
service,
|
||||
createLockfile = true,
|
||||
}: {
|
||||
appId: number;
|
||||
service: string;
|
||||
createLockfile?: boolean;
|
||||
}) => {
|
||||
mockFs({
|
||||
[path.join(
|
||||
constants.rootMountPoint,
|
||||
updateLock.lockPath(appId),
|
||||
service,
|
||||
)]: {
|
||||
[createLockfile ? 'updates.lock' : 'ignore-this.lock']: '',
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
// TODO: Remove these hooks when we don't need './test/data' as test process's rootMountPoint
|
||||
before(() => {
|
||||
// @ts-ignore // Set rootMountPoint for mockFs
|
||||
constants.rootMountPoint = '/mnt/root';
|
||||
});
|
||||
|
||||
after(() => {
|
||||
// @ts-ignore
|
||||
constants.rootMountPoint = process.env.ROOT_MOUNTPOINT;
|
||||
});
|
||||
|
||||
describe('Lockfile path methods', () => {
|
||||
const testAppId = 1234567;
|
||||
const testService = 'test';
|
||||
|
||||
it('should return path prefix of service lockfiles on host', () => {
|
||||
expect(updateLock.lockPath(testAppId)).to.equal(
|
||||
`/tmp/balena-supervisor/services/${testAppId}`,
|
||||
);
|
||||
expect(updateLock.lockPath(testAppId, testService)).to.equal(
|
||||
`/tmp/balena-supervisor/services/${testAppId}/${testService}`,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return the complete paths of (non-)legacy lockfiles on host', () => {
|
||||
const lockFilesOnHost = updateLock.__get__('lockFilesOnHost');
|
||||
expect(lockFilesOnHost(testAppId, testService)).to.deep.equal([
|
||||
`${constants.rootMountPoint}/tmp/balena-supervisor/services/${testAppId}/${testService}/updates.lock`,
|
||||
`${constants.rootMountPoint}/tmp/balena-supervisor/services/${testAppId}/${testService}/resin-updates.lock`,
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ensureNoHUPBreadcrumbsOnHost', () => {
|
||||
afterEach(() => mockFs.restore());
|
||||
|
||||
it('should throw if any breadcrumbs exist on host', async () => {
|
||||
for (const bc of breadcrumbFiles) {
|
||||
mockBreadcrumbs(bc);
|
||||
await expect(updateLock.ensureNoHUPBreadcrumbsOnHost())
|
||||
.to.eventually.be.rejectedWith('Waiting for Host OS update to finish')
|
||||
.and.be.an.instanceOf(UpdatesLockedError);
|
||||
}
|
||||
});
|
||||
|
||||
it('should resolve to true if no breadcrumbs on host', async () => {
|
||||
mockBreadcrumbs();
|
||||
await expect(
|
||||
updateLock.ensureNoHUPBreadcrumbsOnHost(),
|
||||
).to.eventually.equal(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Lock/dispose functionality', () => {
|
||||
const lockFile = updateLock.__get__('lockFile');
|
||||
const locksTaken = updateLock.__get__('locksTaken');
|
||||
const dispose = updateLock.__get__('dispose');
|
||||
const lockExistsErrHandler = updateLock.__get__('lockExistsErrHandler');
|
||||
|
||||
const releaseFn = stub();
|
||||
const testLockPaths = ['/tmp/test/1', '/tmp/test/2'];
|
||||
|
||||
let unlockSyncStub: SinonStub;
|
||||
let unlockAsyncSpy: SinonSpy;
|
||||
let lockAsyncSpy: SinonSpy;
|
||||
|
||||
beforeEach(() => {
|
||||
// @ts-ignore
|
||||
unlockSyncStub = stub(lockFile, 'unlockSync').callsFake((lockPath) => {
|
||||
// Throw error on process.exit for one of the two lockpaths
|
||||
if (lockPath === testLockPaths[1]) {
|
||||
throw new Error(
|
||||
'handled unlockSync error which should not crash test process',
|
||||
);
|
||||
}
|
||||
});
|
||||
unlockAsyncSpy = spy(lockFile, 'unlockAsync');
|
||||
lockAsyncSpy = spy(lockFile, 'lockAsync');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
for (const key of Object.keys(locksTaken)) {
|
||||
delete locksTaken[key];
|
||||
}
|
||||
unlockSyncStub.restore();
|
||||
unlockAsyncSpy.restore();
|
||||
lockAsyncSpy.restore();
|
||||
});
|
||||
|
||||
it('should try to clean up existing locks on process exit', () => {
|
||||
testLockPaths.forEach((p) => (locksTaken[p] = true));
|
||||
|
||||
// @ts-ignore
|
||||
process.emit('exit');
|
||||
testLockPaths.forEach((p) => {
|
||||
expect(unlockSyncStub).to.have.been.calledWith(p);
|
||||
});
|
||||
});
|
||||
|
||||
it('should dispose of locks', async () => {
|
||||
for (const lock of testLockPaths) {
|
||||
locksTaken[lock] = true;
|
||||
}
|
||||
|
||||
await dispose(releaseFn);
|
||||
|
||||
expect(locksTaken).to.deep.equal({});
|
||||
expect(releaseFn).to.have.been.called;
|
||||
testLockPaths.forEach((p) => {
|
||||
expect(unlockAsyncSpy).to.have.been.calledWith(p);
|
||||
});
|
||||
});
|
||||
|
||||
describe('lockExistsErrHandler', () => {
|
||||
it('should handle EEXIST', async () => {
|
||||
const appIdentifiers = [
|
||||
{ id: '1234567', service: 'test1', type: 'appId' },
|
||||
{
|
||||
id: 'c89a7cb83d974518479591ffaf7c2417',
|
||||
service: 'test2',
|
||||
type: 'appUuid',
|
||||
},
|
||||
{ id: 'c89a7cb', service: 'test3', type: 'appUuid' },
|
||||
];
|
||||
for (const { id, service, type } of appIdentifiers) {
|
||||
// Handle legacy & nonlegacy lockfile names
|
||||
for (const lockfile of ['updates.lock', 'resin-updates.lock']) {
|
||||
const error = {
|
||||
code: 'EEXIST',
|
||||
message: `EEXIST: open "/tmp/balena-supervisor/services/${id}/${service}/${lockfile}"`,
|
||||
};
|
||||
await expect(lockExistsErrHandler(error, releaseFn))
|
||||
.to.eventually.be.rejectedWith(
|
||||
`Lockfile exists for ${JSON.stringify({
|
||||
serviceName: service,
|
||||
[type]: id,
|
||||
})}`,
|
||||
)
|
||||
.and.be.an.instanceOf(UpdatesLockedError);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle any other errors', async () => {
|
||||
await expect(lockExistsErrHandler(new Error('Test error'), releaseFn))
|
||||
.to.eventually.be.rejectedWith('Test error')
|
||||
.and.be.an.instanceOf(UpdatesLockedError);
|
||||
});
|
||||
});
|
||||
|
||||
describe('lock', () => {
|
||||
let bluebirdUsing: SinonSpy;
|
||||
let bluebirdResolve: SinonSpy;
|
||||
const lockParamFn = stub().resolves();
|
||||
|
||||
beforeEach(() => {
|
||||
bluebirdUsing = spy(Bluebird, 'using');
|
||||
bluebirdResolve = spy(Bluebird, 'resolve');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
bluebirdUsing.restore();
|
||||
bluebirdResolve.restore();
|
||||
|
||||
mockFs.restore();
|
||||
});
|
||||
|
||||
it('resolves input function without dispose pattern when appId is null', async () => {
|
||||
mockLockDir({ appId: 1234567, service: 'test', createLockfile: true });
|
||||
await expect(updateLock.lock(null, { force: false }, lockParamFn)).to.be
|
||||
.fulfilled;
|
||||
expect(bluebirdResolve).to.have.been.called;
|
||||
});
|
||||
|
||||
it('resolves input function without dispose pattern when no lockfiles exist', async () => {
|
||||
mockLockDir({ appId: 1234567, service: 'test', createLockfile: false });
|
||||
await expect(updateLock.lock(1234567, { force: false }, lockParamFn)).to
|
||||
.be.fulfilled;
|
||||
expect(bluebirdResolve).to.have.been.called;
|
||||
});
|
||||
|
||||
it('uses dispose pattern if lockfile present and throws error', async () => {
|
||||
mockLockDir({ appId: 1234567, service: 'test' });
|
||||
await expect(updateLock.lock(1234567, { force: false }, lockParamFn))
|
||||
.to.eventually.be.rejectedWith(
|
||||
'Lockfile exists for {"serviceName":"test","appId":"1234567"}',
|
||||
)
|
||||
.and.be.an.instanceOf(UpdatesLockedError);
|
||||
expect(lockAsyncSpy).to.have.been.called;
|
||||
expect(bluebirdUsing).to.have.been.called;
|
||||
});
|
||||
|
||||
it('unlocks lockfile to resolve function if force option specified', async () => {
|
||||
mockLockDir({ appId: 1234567, service: 'test' });
|
||||
await expect(updateLock.lock(1234567, { force: true }, lockParamFn)).to
|
||||
.be.fulfilled;
|
||||
expect(unlockAsyncSpy).to.have.been.called;
|
||||
expect(lockAsyncSpy).to.have.been.called;
|
||||
expect(bluebirdUsing).to.have.been.called;
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user