Prevent updates/reboots with locks when HUP breadcrumbs present

On HUP, some healthceck services need to complete before
it's safe for the Supervisor to reboot the device when
applying state changes. rollback-{health|altboot}-breadcrumb
are the two files that Supervisor looks for and locks the device
on when present in this patch.

Not closing issue 1459 because there is a possible case where,
on altboot rollback, the breadcrumbs are not present. 1459
may be closed when this edge case is investigated.

Change-type: patch
Connects-to: #1459
See: https://www.flowdock.com/app/rulemotion/r-supervisor/threads/cL7YfNOLSfTPfw05h59GEW0kfOt
Signed-off-by: Christina Wang <christina@balena.io>
This commit is contained in:
Christina Wang 2021-06-14 19:10:16 +09:00
parent 5223262557
commit a9028e58ec
No known key found for this signature in database
GPG Key ID: 7C3ED0230F440835
5 changed files with 321 additions and 11 deletions

View File

@ -638,6 +638,7 @@ export function reportCurrentState(
}
export async function reboot(force?: boolean, skipLock?: boolean) {
await updateLock.ensureNoHUPBreadcrumbsOnHost();
await applicationManager.stopAll({ force, skipLock });
logger.logSystemMessage('Rebooting', {}, 'Reboot');
const $reboot = await dbus.reboot();
@ -647,6 +648,7 @@ export async function reboot(force?: boolean, skipLock?: boolean) {
}
export async function shutdown(force?: boolean, skipLock?: boolean) {
await updateLock.ensureNoHUPBreadcrumbsOnHost();
await applicationManager.stopAll({ force, skipLock });
logger.logSystemMessage('Shutting down', {}, 'Shutdown');
const $shutdown = await dbus.shutdown();
@ -757,7 +759,7 @@ function applyError(
if (err instanceof UpdatesLockedError) {
const message = `Updates are locked, retrying in ${prettyMs(delay, {
compact: true,
})}...`;
})}. Reason: ${err.message}`;
logger.logSystemMessage(message, {}, 'updateLocked', false);
log.info(message);
} else {

View File

@ -7,6 +7,7 @@ const supervisorNetworkInterface = 'supervisor0';
const constants = {
rootMountPoint,
stateMountPoint: '/mnt/state',
databasePath:
checkString(process.env.DATABASE_PATH) || '/data/database.sqlite',
containerId: checkString(process.env.SUPERVISOR_CONTAINER_ID) || undefined,

View File

@ -5,8 +5,9 @@ import { promises as fs } from 'fs';
import * as path from 'path';
import * as Lock from 'rwlock';
import { ENOENT, UpdatesLockedError } from './errors';
import { getPathOnHost } from './fs-utils';
import * as constants from './constants';
import { ENOENT, EEXIST, UpdatesLockedError } from './errors';
import { getPathOnHost, pathExistsOnHost } from './fs-utils';
type asyncLockFile = typeof lockFileLib & {
unlockAsync(path: string): Bluebird<void>;
@ -35,6 +36,29 @@ function lockFilesOnHost(appId: number, serviceName: string): string[] {
);
}
/**
* Check for rollback-{health|altboot}-breadcrumb, two files that exist while
* rollback-{health|altboot}.service have not exited. If these files exist,
* prevent reboot. If the Supervisor reboots while those services are still running,
* the device may become stuck in an invalid state during HUP.
*/
export function ensureNoHUPBreadcrumbsOnHost(): Promise<boolean | never> {
return Promise.all(
[
'rollback-health-breadcrumb',
'rollback-altboot-breadcrumb',
].map((filename) =>
pathExistsOnHost(path.join(constants.stateMountPoint, filename)),
),
).then((existsArray) => {
const anyExists = existsArray.some((exists) => exists);
if (anyExists) {
throw new UpdatesLockedError('Waiting for Host OS update to finish');
}
return anyExists;
});
}
const locksTaken: { [lockName: string]: boolean } = {};
// Try to clean up any existing locks when the program exits
@ -66,6 +90,24 @@ function dispose(release: () => void): Bluebird<void> {
.return();
}
const lockExistsErrHandler = (err: Error, release: () => void) => {
let errMsg = err.message;
if (EEXIST(err)) {
// Extract appId|appUuid and serviceName from lockfile path for log message
// appId: [0-9]{7}, appUuid: [0-9a-w]{32}, short appUuid: [0-9a-w]{7}
const pathMatch = err.message.match(
/\/([0-9]{7}|[0-9a-w]{32}|[0-9a-w]{7})\/(.*)\/(?:resin-)?updates.lock/,
);
if (pathMatch && pathMatch.length === 3) {
errMsg = `Lockfile exists for ${JSON.stringify({
serviceName: pathMatch[2],
[/^[0-9]{7}$/.test(pathMatch[1]) ? 'appId' : 'appUuid']: pathMatch[1],
})}`;
}
}
return dispose(release).throw(new UpdatesLockedError(errMsg));
};
/**
* Try to take the locks for an application. If force is set, it will remove
* all existing lockfiles before performing the operation
@ -100,12 +142,9 @@ export function lock(
})
.catchReturn(ENOENT, undefined);
},
).catch((err) => {
return dispose(release).throw(
new UpdatesLockedError(`Updates are locked: ${err.message}`),
);
});
});
);
})
.catch((err) => lockExistsErrHandler(err, release));
})
.disposer(dispose);
};

View File

@ -1,8 +1,8 @@
import * as _ from 'lodash';
import { stub } from 'sinon';
import { SinonStub, stub } from 'sinon';
import { expect } from 'chai';
import { StatusCodeError } from '../src/lib/errors';
import { StatusCodeError, UpdatesLockedError } from '../src/lib/errors';
import prepare = require('./lib/prepare');
import * as dockerUtils from '../src/lib/docker-utils';
import * as config from '../src/config';
@ -13,6 +13,7 @@ import * as deviceConfig from '../src/device-config';
import { loadTargetFromFile } from '../src/device-state/preload';
import Service from '../src/compose/service';
import { intialiseContractRequirements } from '../src/lib/contracts';
import * as updateLock from '../src/lib/update-lock';
const mockedInitialConfig = {
RESIN_SUPERVISOR_CONNECTIVITY_CHECK: 'true',
@ -355,4 +356,20 @@ describe('deviceState', () => {
it.skip('applies the target state for device config');
it.skip('applies the target state for applications');
it('prevents reboot or shutdown when HUP rollback breadcrumbs are present', async () => {
const testErrMsg = 'Waiting for Host OS updates to finish';
stub(updateLock, 'ensureNoHUPBreadcrumbsOnHost').throws(
new UpdatesLockedError(testErrMsg),
);
await expect(deviceState.reboot())
.to.eventually.be.rejectedWith(testErrMsg)
.and.be.an.instanceOf(UpdatesLockedError);
await expect(deviceState.shutdown())
.to.eventually.be.rejectedWith(testErrMsg)
.and.be.an.instanceOf(UpdatesLockedError);
(updateLock.ensureNoHUPBreadcrumbsOnHost as SinonStub).restore();
});
});

View File

@ -0,0 +1,251 @@
import { expect } from 'chai';
import { SinonSpy, SinonStub, spy, stub } from 'sinon';
import * as path from 'path';
import * as Bluebird from 'bluebird';
import rewire = require('rewire');
import mockFs = require('mock-fs');
import * as constants from '../../../src/lib/constants';
import { UpdatesLockedError } from '../../../src/lib/errors';
describe('lib/update-lock', () => {
const updateLock = rewire('../../../src/lib/update-lock');
const breadcrumbFiles = [
'rollback-health-breadcrumb',
'rollback-altboot-breadcrumb',
];
const mockBreadcrumbs = (breadcrumb?: string) => {
mockFs({
[path.join(
constants.rootMountPoint,
constants.stateMountPoint,
breadcrumb ? breadcrumb : '',
)]: '',
});
};
const mockLockDir = ({
appId,
service,
createLockfile = true,
}: {
appId: number;
service: string;
createLockfile?: boolean;
}) => {
mockFs({
[path.join(
constants.rootMountPoint,
updateLock.lockPath(appId),
service,
)]: {
[createLockfile ? 'updates.lock' : 'ignore-this.lock']: '',
},
});
};
// TODO: Remove these hooks when we don't need './test/data' as test process's rootMountPoint
before(() => {
// @ts-ignore // Set rootMountPoint for mockFs
constants.rootMountPoint = '/mnt/root';
});
after(() => {
// @ts-ignore
constants.rootMountPoint = process.env.ROOT_MOUNTPOINT;
});
describe('Lockfile path methods', () => {
const testAppId = 1234567;
const testService = 'test';
it('should return path prefix of service lockfiles on host', () => {
expect(updateLock.lockPath(testAppId)).to.equal(
`/tmp/balena-supervisor/services/${testAppId}`,
);
expect(updateLock.lockPath(testAppId, testService)).to.equal(
`/tmp/balena-supervisor/services/${testAppId}/${testService}`,
);
});
it('should return the complete paths of (non-)legacy lockfiles on host', () => {
const lockFilesOnHost = updateLock.__get__('lockFilesOnHost');
expect(lockFilesOnHost(testAppId, testService)).to.deep.equal([
`${constants.rootMountPoint}/tmp/balena-supervisor/services/${testAppId}/${testService}/updates.lock`,
`${constants.rootMountPoint}/tmp/balena-supervisor/services/${testAppId}/${testService}/resin-updates.lock`,
]);
});
});
describe('ensureNoHUPBreadcrumbsOnHost', () => {
afterEach(() => mockFs.restore());
it('should throw if any breadcrumbs exist on host', async () => {
for (const bc of breadcrumbFiles) {
mockBreadcrumbs(bc);
await expect(updateLock.ensureNoHUPBreadcrumbsOnHost())
.to.eventually.be.rejectedWith('Waiting for Host OS update to finish')
.and.be.an.instanceOf(UpdatesLockedError);
}
});
it('should resolve to true if no breadcrumbs on host', async () => {
mockBreadcrumbs();
await expect(
updateLock.ensureNoHUPBreadcrumbsOnHost(),
).to.eventually.equal(false);
});
});
describe('Lock/dispose functionality', () => {
const lockFile = updateLock.__get__('lockFile');
const locksTaken = updateLock.__get__('locksTaken');
const dispose = updateLock.__get__('dispose');
const lockExistsErrHandler = updateLock.__get__('lockExistsErrHandler');
const releaseFn = stub();
const testLockPaths = ['/tmp/test/1', '/tmp/test/2'];
let unlockSyncStub: SinonStub;
let unlockAsyncSpy: SinonSpy;
let lockAsyncSpy: SinonSpy;
beforeEach(() => {
// @ts-ignore
unlockSyncStub = stub(lockFile, 'unlockSync').callsFake((lockPath) => {
// Throw error on process.exit for one of the two lockpaths
if (lockPath === testLockPaths[1]) {
throw new Error(
'handled unlockSync error which should not crash test process',
);
}
});
unlockAsyncSpy = spy(lockFile, 'unlockAsync');
lockAsyncSpy = spy(lockFile, 'lockAsync');
});
afterEach(() => {
for (const key of Object.keys(locksTaken)) {
delete locksTaken[key];
}
unlockSyncStub.restore();
unlockAsyncSpy.restore();
lockAsyncSpy.restore();
});
it('should try to clean up existing locks on process exit', () => {
testLockPaths.forEach((p) => (locksTaken[p] = true));
// @ts-ignore
process.emit('exit');
testLockPaths.forEach((p) => {
expect(unlockSyncStub).to.have.been.calledWith(p);
});
});
it('should dispose of locks', async () => {
for (const lock of testLockPaths) {
locksTaken[lock] = true;
}
await dispose(releaseFn);
expect(locksTaken).to.deep.equal({});
expect(releaseFn).to.have.been.called;
testLockPaths.forEach((p) => {
expect(unlockAsyncSpy).to.have.been.calledWith(p);
});
});
describe('lockExistsErrHandler', () => {
it('should handle EEXIST', async () => {
const appIdentifiers = [
{ id: '1234567', service: 'test1', type: 'appId' },
{
id: 'c89a7cb83d974518479591ffaf7c2417',
service: 'test2',
type: 'appUuid',
},
{ id: 'c89a7cb', service: 'test3', type: 'appUuid' },
];
for (const { id, service, type } of appIdentifiers) {
// Handle legacy & nonlegacy lockfile names
for (const lockfile of ['updates.lock', 'resin-updates.lock']) {
const error = {
code: 'EEXIST',
message: `EEXIST: open "/tmp/balena-supervisor/services/${id}/${service}/${lockfile}"`,
};
await expect(lockExistsErrHandler(error, releaseFn))
.to.eventually.be.rejectedWith(
`Lockfile exists for ${JSON.stringify({
serviceName: service,
[type]: id,
})}`,
)
.and.be.an.instanceOf(UpdatesLockedError);
}
}
});
it('should handle any other errors', async () => {
await expect(lockExistsErrHandler(new Error('Test error'), releaseFn))
.to.eventually.be.rejectedWith('Test error')
.and.be.an.instanceOf(UpdatesLockedError);
});
});
describe('lock', () => {
let bluebirdUsing: SinonSpy;
let bluebirdResolve: SinonSpy;
const lockParamFn = stub().resolves();
beforeEach(() => {
bluebirdUsing = spy(Bluebird, 'using');
bluebirdResolve = spy(Bluebird, 'resolve');
});
afterEach(() => {
bluebirdUsing.restore();
bluebirdResolve.restore();
mockFs.restore();
});
it('resolves input function without dispose pattern when appId is null', async () => {
mockLockDir({ appId: 1234567, service: 'test', createLockfile: true });
await expect(updateLock.lock(null, { force: false }, lockParamFn)).to.be
.fulfilled;
expect(bluebirdResolve).to.have.been.called;
});
it('resolves input function without dispose pattern when no lockfiles exist', async () => {
mockLockDir({ appId: 1234567, service: 'test', createLockfile: false });
await expect(updateLock.lock(1234567, { force: false }, lockParamFn)).to
.be.fulfilled;
expect(bluebirdResolve).to.have.been.called;
});
it('uses dispose pattern if lockfile present and throws error', async () => {
mockLockDir({ appId: 1234567, service: 'test' });
await expect(updateLock.lock(1234567, { force: false }, lockParamFn))
.to.eventually.be.rejectedWith(
'Lockfile exists for {"serviceName":"test","appId":"1234567"}',
)
.and.be.an.instanceOf(UpdatesLockedError);
expect(lockAsyncSpy).to.have.been.called;
expect(bluebirdUsing).to.have.been.called;
});
it('unlocks lockfile to resolve function if force option specified', async () => {
mockLockDir({ appId: 1234567, service: 'test' });
await expect(updateLock.lock(1234567, { force: true }, lockParamFn)).to
.be.fulfilled;
expect(unlockAsyncSpy).to.have.been.called;
expect(lockAsyncSpy).to.have.been.called;
expect(bluebirdUsing).to.have.been.called;
});
});
});
});