mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-01-20 19:49:01 +00:00
Clean up remaining locks on state settle
Locks could remain from a previous supervisor run that didn't get to settle the state. This ensures that cleanup will happen for remaining locks every time the state is settled. Change-type: patch
This commit is contained in:
parent
3c6e9dd209
commit
9c09329b86
@ -201,9 +201,8 @@ class AppImpl implements App {
|
||||
);
|
||||
}
|
||||
|
||||
// If the app still has a lock, release it
|
||||
if (state.lock != null) {
|
||||
// Release locks for all services before settling state
|
||||
// Release locks (if any) for all services before settling state
|
||||
if (state.lock || state.hasLeftoverLocks) {
|
||||
steps.push(
|
||||
generateStep('releaseLock', {
|
||||
appId: target.appId,
|
||||
|
@ -13,6 +13,7 @@ import * as constants from '../lib/constants';
|
||||
import log from '../lib/supervisor-console';
|
||||
import { InternalInconsistencyError } from '../lib/errors';
|
||||
import type { Lock } from '../lib/update-lock';
|
||||
import { hasLeftoverLocks } from '../lib/update-lock';
|
||||
import { checkTruthy } from '../lib/validation';
|
||||
|
||||
import { App } from './app';
|
||||
@ -180,6 +181,10 @@ export async function inferNextSteps(
|
||||
const currentAppIds = Object.keys(currentApps).map((i) => parseInt(i, 10));
|
||||
const targetAppIds = Object.keys(targetApps).map((i) => parseInt(i, 10));
|
||||
|
||||
const withLeftoverLocks = await Promise.all(
|
||||
currentAppIds.map((id) => hasLeftoverLocks(id)),
|
||||
);
|
||||
|
||||
let steps: CompositionStep[] = [];
|
||||
|
||||
// First check if we need to create the supervisor network
|
||||
@ -239,6 +244,7 @@ export async function inferNextSteps(
|
||||
downloading,
|
||||
force,
|
||||
lock: appLocks[id],
|
||||
hasLeftoverLocks: withLeftoverLocks[id],
|
||||
},
|
||||
targetApps[id],
|
||||
),
|
||||
@ -254,6 +260,7 @@ export async function inferNextSteps(
|
||||
containerIds: containerIdsByAppId[id],
|
||||
force,
|
||||
lock: appLocks[id],
|
||||
hasLeftoverLocks: withLeftoverLocks[id],
|
||||
}),
|
||||
);
|
||||
}
|
||||
@ -279,6 +286,7 @@ export async function inferNextSteps(
|
||||
downloading,
|
||||
force,
|
||||
lock: appLocks[id],
|
||||
hasLeftoverLocks: false,
|
||||
},
|
||||
targetApps[id],
|
||||
),
|
||||
|
@ -5,7 +5,7 @@ import * as serviceManager from './service-manager';
|
||||
import * as networkManager from './network-manager';
|
||||
import * as volumeManager from './volume-manager';
|
||||
import * as commitStore from './commit';
|
||||
import { Lockable } from '../lib/update-lock';
|
||||
import { Lockable, cleanLocksForApp } from '../lib/update-lock';
|
||||
import type { DeviceLegacyReport } from '../types/state';
|
||||
import type { CompositionStepAction, CompositionStepT } from './types';
|
||||
import type { Lock } from '../lib/update-lock';
|
||||
@ -151,7 +151,11 @@ export function getExecutors(app: { callbacks: CompositionCallbacks }) {
|
||||
},
|
||||
releaseLock: async (step) => {
|
||||
app.callbacks.unregisterLock(step.appId);
|
||||
await step.lock.unlock();
|
||||
if (step.lock != null) {
|
||||
await step.lock.unlock();
|
||||
}
|
||||
// Clean up any remaining locks
|
||||
await cleanLocksForApp(step.appId);
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -9,6 +9,7 @@ export interface UpdateState {
|
||||
availableImages: Image[];
|
||||
containerIds: Dictionary<string>;
|
||||
downloading: string[];
|
||||
hasLeftoverLocks: boolean;
|
||||
lock: Lock | null;
|
||||
force: boolean;
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ export interface CompositionStepArgs {
|
||||
};
|
||||
releaseLock: {
|
||||
appId: string | number;
|
||||
lock: Lock;
|
||||
lock: Lock | null;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -158,11 +158,8 @@ function newLockable(appId: string, services: string[]): Lockable {
|
||||
// Find all the locks already taken for the appId
|
||||
// if this is not empty it probably means these locks are from
|
||||
// a previous run of the supervisor
|
||||
currentLocks = await lockfile.findAll({
|
||||
root: pathOnRoot(lockPath(appId)),
|
||||
filter: (l) =>
|
||||
l.path.endsWith('updates.lock') && l.owner === LOCKFILE_UID,
|
||||
});
|
||||
|
||||
currentLocks = await leftoverLocks(appId);
|
||||
|
||||
// Group locks by service
|
||||
const locksByService = services.map((service) => {
|
||||
@ -329,3 +326,45 @@ export async function withLock<T>(
|
||||
await Promise.all(locks.map((l) => l.unlock()));
|
||||
}
|
||||
}
|
||||
|
||||
async function leftoverLocks(appId: string | number) {
|
||||
// Find all the locks for the appId that are owned by the supervisor
|
||||
return await lockfile.findAll({
|
||||
root: pathOnRoot(lockPath(appId)),
|
||||
filter: (l) => l.path.endsWith('updates.lock') && l.owner === LOCKFILE_UID,
|
||||
});
|
||||
}
|
||||
|
||||
export async function hasLeftoverLocks(appId: string | number) {
|
||||
const leftover = await leftoverLocks(appId);
|
||||
return leftover.length > 0;
|
||||
}
|
||||
|
||||
export async function cleanLocksForApp(
|
||||
appId: string | number,
|
||||
): Promise<boolean> {
|
||||
let disposer: ReadWriteLock.Release = () => {
|
||||
/* noop */
|
||||
};
|
||||
try {
|
||||
// Take the lock for the app to avoid removing locks used somewhere
|
||||
// else. Wait at most 10ms. If the process lock is taken elsewhere
|
||||
// it is expected that cleanup will happen after it is released anyway
|
||||
disposer = await takeGlobalLockOrFail(appId.toString(), 10);
|
||||
|
||||
// Find all the locks for the appId that are owned by the supervisor
|
||||
const currentLocks = await leftoverLocks(appId);
|
||||
|
||||
// Remove any remaining locks
|
||||
await Promise.all(currentLocks.map((l) => fs.rm(l)));
|
||||
return true;
|
||||
} catch (e) {
|
||||
if (e instanceof UpdatesLockedError) {
|
||||
// Ignore locking errors when trying to take the global lock
|
||||
return false;
|
||||
}
|
||||
throw e;
|
||||
} finally {
|
||||
disposer();
|
||||
}
|
||||
}
|
||||
|
@ -425,4 +425,93 @@ describe('lib/update-lock', () => {
|
||||
await expectLocks(123, ['one', 'two'], false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cleanLocksForApp', () => {
|
||||
afterEach(async () => {
|
||||
await fs.rm(pathOnRoot('/tmp/balena-supervisor/services/123'), {
|
||||
recursive: true,
|
||||
force: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('removes remaining supervisor locks and ignores user locks', async () => {
|
||||
const userLock = path.join(lockdir(123, 'one'), 'updates.lock');
|
||||
const serviceLock = path.join(lockdir(123, 'two'), 'resin-updates.lock');
|
||||
const tmp = await testfs({
|
||||
[userLock]: testfs.file({ uid: 0 }),
|
||||
[serviceLock]: testfs.file({ uid: updateLock.LOCKFILE_UID }),
|
||||
}).enable();
|
||||
|
||||
await updateLock.cleanLocksForApp(123);
|
||||
|
||||
// Supervisor locks should have been removed
|
||||
await expect(fs.readdir(lockdir(123, 'two'))).to.eventually.deep.equal(
|
||||
[],
|
||||
);
|
||||
await expect(fs.readdir(lockdir(123, 'one'))).to.eventually.deep.equal([
|
||||
'updates.lock',
|
||||
]);
|
||||
await tmp.restore();
|
||||
});
|
||||
|
||||
it('removes remaining supervisor locks and ignores user legacy locks', async () => {
|
||||
const userLock = path.join(lockdir(123, 'one'), 'resin-updates.lock');
|
||||
const serviceLock = path.join(lockdir(123, 'two'), 'updates.lock');
|
||||
const tmp = await testfs({
|
||||
[userLock]: testfs.file({ uid: 0 }),
|
||||
[serviceLock]: testfs.file({ uid: updateLock.LOCKFILE_UID }),
|
||||
}).enable();
|
||||
|
||||
await updateLock.cleanLocksForApp(123);
|
||||
|
||||
// Supervisor locks should have been removed
|
||||
await expect(fs.readdir(lockdir(123, 'two'))).to.eventually.deep.equal(
|
||||
[],
|
||||
);
|
||||
await expect(fs.readdir(lockdir(123, 'one'))).to.eventually.deep.equal([
|
||||
'resin-updates.lock',
|
||||
]);
|
||||
await tmp.restore();
|
||||
});
|
||||
|
||||
it('ignores locks if taken somewhere else in the supervisor', async () => {
|
||||
// No locks before locking takes place
|
||||
await expectLocks(123, ['one', 'two'], false);
|
||||
|
||||
const lockable = Lockable.from(123, ['one', 'two']);
|
||||
const lock = await lockable.lock();
|
||||
|
||||
// Locks should exist now
|
||||
await expectLocks(123, ['one', 'two']);
|
||||
|
||||
// Clean locks should not remove anything
|
||||
await expect(updateLock.cleanLocksForApp(123)).to.eventually.equal(false);
|
||||
|
||||
// Locks should still exist
|
||||
await expectLocks(123, ['one', 'two']);
|
||||
|
||||
await lock.unlock();
|
||||
await expectLocks(123, ['one', 'two'], false);
|
||||
});
|
||||
|
||||
it('only removes locks for the given app', async () => {
|
||||
const otherLock = path.join(lockdir(456, 'main'), 'updates.lock');
|
||||
const serviceLock = path.join(lockdir(123, 'main'), 'resin-updates.lock');
|
||||
const tmp = await testfs({
|
||||
[otherLock]: testfs.file({ uid: updateLock.LOCKFILE_UID }),
|
||||
[serviceLock]: testfs.file({ uid: updateLock.LOCKFILE_UID }),
|
||||
}).enable();
|
||||
|
||||
await updateLock.cleanLocksForApp(123);
|
||||
|
||||
// Supervisor locks should have been removed
|
||||
await expect(fs.readdir(lockdir(123, 'main'))).to.eventually.deep.equal(
|
||||
[],
|
||||
);
|
||||
await expect(fs.readdir(lockdir(456, 'main'))).to.eventually.deep.equal([
|
||||
'updates.lock',
|
||||
]);
|
||||
await tmp.restore();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
@ -2,7 +2,7 @@ import { expect } from 'chai';
|
||||
import type { Image } from '~/src/compose/images';
|
||||
import { Network } from '~/src/compose/network';
|
||||
import { Volume } from '~/src/compose/volume';
|
||||
import type { Lock } from '~/lib/update-lock';
|
||||
import { type Lock } from '~/lib/update-lock';
|
||||
|
||||
import {
|
||||
createService,
|
||||
@ -20,6 +20,7 @@ const defaultContext = {
|
||||
containerIds: {},
|
||||
downloading: [] as string[],
|
||||
lock: null,
|
||||
hasLeftoverLocks: false,
|
||||
};
|
||||
|
||||
const mockLock: Lock = {
|
||||
@ -318,6 +319,7 @@ describe('compose/app', () => {
|
||||
networks: [DEFAULT_NETWORK],
|
||||
volumes: [volume],
|
||||
});
|
||||
|
||||
expect(
|
||||
currentWithServiceRemoved.nextStepsForAppUpdate(
|
||||
contextWithImages,
|
||||
@ -2091,7 +2093,7 @@ describe('compose/app', () => {
|
||||
});
|
||||
|
||||
describe('update lock state behavior', () => {
|
||||
it('should infer a releaseLock step if there are locks to be released before settling target state', async () => {
|
||||
it('should not infer a releaseLock step if there are locks to be released before settling target state', async () => {
|
||||
const services = [
|
||||
await createService({ serviceName: 'server' }),
|
||||
await createService({ serviceName: 'client' }),
|
||||
@ -2128,6 +2130,29 @@ describe('compose/app', () => {
|
||||
expect(releaseLockStep2).to.have.property('appId').that.equals(1);
|
||||
});
|
||||
|
||||
it('should infer a releaseLock step if there are leftover locks', async () => {
|
||||
const services = [
|
||||
await createService({ serviceName: 'server' }),
|
||||
await createService({ serviceName: 'client' }),
|
||||
];
|
||||
const current = createApp({
|
||||
services,
|
||||
networks: [DEFAULT_NETWORK],
|
||||
});
|
||||
const target = createApp({
|
||||
services,
|
||||
networks: [DEFAULT_NETWORK],
|
||||
isTarget: true,
|
||||
});
|
||||
|
||||
const steps = current.nextStepsForAppUpdate(
|
||||
{ ...defaultContext, hasLeftoverLocks: true },
|
||||
target,
|
||||
);
|
||||
expect(steps).to.have.length(1);
|
||||
expectSteps('releaseLock', steps);
|
||||
});
|
||||
|
||||
it('should not infer a releaseLock step if there are no locks to be released', async () => {
|
||||
const services = [
|
||||
await createService({ serviceName: 'server' }),
|
||||
|
Loading…
Reference in New Issue
Block a user