Clean up remaining locks on state settle

Locks could remain from a previous supervisor run that didn't get to
settle the state. This ensures that cleanup will happen for remaining
locks every time the state is settled.

Change-type: patch
This commit is contained in:
Felipe Lalanne 2024-11-21 14:22:26 -03:00
parent 3c6e9dd209
commit 9c09329b86
No known key found for this signature in database
GPG Key ID: 03E696BFD472B26A
8 changed files with 178 additions and 13 deletions

View File

@ -201,9 +201,8 @@ class AppImpl implements App {
);
}
// If the app still has a lock, release it
if (state.lock != null) {
// Release locks for all services before settling state
// Release locks (if any) for all services before settling state
if (state.lock || state.hasLeftoverLocks) {
steps.push(
generateStep('releaseLock', {
appId: target.appId,

View File

@ -13,6 +13,7 @@ import * as constants from '../lib/constants';
import log from '../lib/supervisor-console';
import { InternalInconsistencyError } from '../lib/errors';
import type { Lock } from '../lib/update-lock';
import { hasLeftoverLocks } from '../lib/update-lock';
import { checkTruthy } from '../lib/validation';
import { App } from './app';
@ -180,6 +181,10 @@ export async function inferNextSteps(
const currentAppIds = Object.keys(currentApps).map((i) => parseInt(i, 10));
const targetAppIds = Object.keys(targetApps).map((i) => parseInt(i, 10));
const withLeftoverLocks = await Promise.all(
currentAppIds.map((id) => hasLeftoverLocks(id)),
);
let steps: CompositionStep[] = [];
// First check if we need to create the supervisor network
@ -239,6 +244,7 @@ export async function inferNextSteps(
downloading,
force,
lock: appLocks[id],
hasLeftoverLocks: withLeftoverLocks[id],
},
targetApps[id],
),
@ -254,6 +260,7 @@ export async function inferNextSteps(
containerIds: containerIdsByAppId[id],
force,
lock: appLocks[id],
hasLeftoverLocks: withLeftoverLocks[id],
}),
);
}
@ -279,6 +286,7 @@ export async function inferNextSteps(
downloading,
force,
lock: appLocks[id],
hasLeftoverLocks: false,
},
targetApps[id],
),

View File

@ -5,7 +5,7 @@ import * as serviceManager from './service-manager';
import * as networkManager from './network-manager';
import * as volumeManager from './volume-manager';
import * as commitStore from './commit';
import { Lockable } from '../lib/update-lock';
import { Lockable, cleanLocksForApp } from '../lib/update-lock';
import type { DeviceLegacyReport } from '../types/state';
import type { CompositionStepAction, CompositionStepT } from './types';
import type { Lock } from '../lib/update-lock';
@ -151,7 +151,11 @@ export function getExecutors(app: { callbacks: CompositionCallbacks }) {
},
releaseLock: async (step) => {
app.callbacks.unregisterLock(step.appId);
await step.lock.unlock();
if (step.lock != null) {
await step.lock.unlock();
}
// Clean up any remaining locks
await cleanLocksForApp(step.appId);
},
};

View File

@ -9,6 +9,7 @@ export interface UpdateState {
availableImages: Image[];
containerIds: Dictionary<string>;
downloading: string[];
hasLeftoverLocks: boolean;
lock: Lock | null;
force: boolean;
}

View File

@ -74,7 +74,7 @@ export interface CompositionStepArgs {
};
releaseLock: {
appId: string | number;
lock: Lock;
lock: Lock | null;
};
}

View File

@ -158,11 +158,8 @@ function newLockable(appId: string, services: string[]): Lockable {
// Find all the locks already taken for the appId
// if this is not empty it probably means these locks are from
// a previous run of the supervisor
currentLocks = await lockfile.findAll({
root: pathOnRoot(lockPath(appId)),
filter: (l) =>
l.path.endsWith('updates.lock') && l.owner === LOCKFILE_UID,
});
currentLocks = await leftoverLocks(appId);
// Group locks by service
const locksByService = services.map((service) => {
@ -329,3 +326,45 @@ export async function withLock<T>(
await Promise.all(locks.map((l) => l.unlock()));
}
}
async function leftoverLocks(appId: string | number) {
// Find all the locks for the appId that are owned by the supervisor
return await lockfile.findAll({
root: pathOnRoot(lockPath(appId)),
filter: (l) => l.path.endsWith('updates.lock') && l.owner === LOCKFILE_UID,
});
}
export async function hasLeftoverLocks(appId: string | number) {
const leftover = await leftoverLocks(appId);
return leftover.length > 0;
}
export async function cleanLocksForApp(
appId: string | number,
): Promise<boolean> {
let disposer: ReadWriteLock.Release = () => {
/* noop */
};
try {
// Take the lock for the app to avoid removing locks used somewhere
// else. Wait at most 10ms. If the process lock is taken elsewhere
// it is expected that cleanup will happen after it is released anyway
disposer = await takeGlobalLockOrFail(appId.toString(), 10);
// Find all the locks for the appId that are owned by the supervisor
const currentLocks = await leftoverLocks(appId);
// Remove any remaining locks
await Promise.all(currentLocks.map((l) => fs.rm(l)));
return true;
} catch (e) {
if (e instanceof UpdatesLockedError) {
// Ignore locking errors when trying to take the global lock
return false;
}
throw e;
} finally {
disposer();
}
}

View File

@ -425,4 +425,93 @@ describe('lib/update-lock', () => {
await expectLocks(123, ['one', 'two'], false);
});
});
describe('cleanLocksForApp', () => {
afterEach(async () => {
await fs.rm(pathOnRoot('/tmp/balena-supervisor/services/123'), {
recursive: true,
force: true,
});
});
it('removes remaining supervisor locks and ignores user locks', async () => {
const userLock = path.join(lockdir(123, 'one'), 'updates.lock');
const serviceLock = path.join(lockdir(123, 'two'), 'resin-updates.lock');
const tmp = await testfs({
[userLock]: testfs.file({ uid: 0 }),
[serviceLock]: testfs.file({ uid: updateLock.LOCKFILE_UID }),
}).enable();
await updateLock.cleanLocksForApp(123);
// Supervisor locks should have been removed
await expect(fs.readdir(lockdir(123, 'two'))).to.eventually.deep.equal(
[],
);
await expect(fs.readdir(lockdir(123, 'one'))).to.eventually.deep.equal([
'updates.lock',
]);
await tmp.restore();
});
it('removes remaining supervisor locks and ignores user legacy locks', async () => {
const userLock = path.join(lockdir(123, 'one'), 'resin-updates.lock');
const serviceLock = path.join(lockdir(123, 'two'), 'updates.lock');
const tmp = await testfs({
[userLock]: testfs.file({ uid: 0 }),
[serviceLock]: testfs.file({ uid: updateLock.LOCKFILE_UID }),
}).enable();
await updateLock.cleanLocksForApp(123);
// Supervisor locks should have been removed
await expect(fs.readdir(lockdir(123, 'two'))).to.eventually.deep.equal(
[],
);
await expect(fs.readdir(lockdir(123, 'one'))).to.eventually.deep.equal([
'resin-updates.lock',
]);
await tmp.restore();
});
it('ignores locks if taken somewhere else in the supervisor', async () => {
// No locks before locking takes place
await expectLocks(123, ['one', 'two'], false);
const lockable = Lockable.from(123, ['one', 'two']);
const lock = await lockable.lock();
// Locks should exist now
await expectLocks(123, ['one', 'two']);
// Clean locks should not remove anything
await expect(updateLock.cleanLocksForApp(123)).to.eventually.equal(false);
// Locks should still exist
await expectLocks(123, ['one', 'two']);
await lock.unlock();
await expectLocks(123, ['one', 'two'], false);
});
it('only removes locks for the given app', async () => {
const otherLock = path.join(lockdir(456, 'main'), 'updates.lock');
const serviceLock = path.join(lockdir(123, 'main'), 'resin-updates.lock');
const tmp = await testfs({
[otherLock]: testfs.file({ uid: updateLock.LOCKFILE_UID }),
[serviceLock]: testfs.file({ uid: updateLock.LOCKFILE_UID }),
}).enable();
await updateLock.cleanLocksForApp(123);
// Supervisor locks should have been removed
await expect(fs.readdir(lockdir(123, 'main'))).to.eventually.deep.equal(
[],
);
await expect(fs.readdir(lockdir(456, 'main'))).to.eventually.deep.equal([
'updates.lock',
]);
await tmp.restore();
});
});
});

View File

@ -2,7 +2,7 @@ import { expect } from 'chai';
import type { Image } from '~/src/compose/images';
import { Network } from '~/src/compose/network';
import { Volume } from '~/src/compose/volume';
import type { Lock } from '~/lib/update-lock';
import { type Lock } from '~/lib/update-lock';
import {
createService,
@ -20,6 +20,7 @@ const defaultContext = {
containerIds: {},
downloading: [] as string[],
lock: null,
hasLeftoverLocks: false,
};
const mockLock: Lock = {
@ -318,6 +319,7 @@ describe('compose/app', () => {
networks: [DEFAULT_NETWORK],
volumes: [volume],
});
expect(
currentWithServiceRemoved.nextStepsForAppUpdate(
contextWithImages,
@ -2091,7 +2093,7 @@ describe('compose/app', () => {
});
describe('update lock state behavior', () => {
it('should infer a releaseLock step if there are locks to be released before settling target state', async () => {
it('should not infer a releaseLock step if there are locks to be released before settling target state', async () => {
const services = [
await createService({ serviceName: 'server' }),
await createService({ serviceName: 'client' }),
@ -2128,6 +2130,29 @@ describe('compose/app', () => {
expect(releaseLockStep2).to.have.property('appId').that.equals(1);
});
it('should infer a releaseLock step if there are leftover locks', async () => {
const services = [
await createService({ serviceName: 'server' }),
await createService({ serviceName: 'client' }),
];
const current = createApp({
services,
networks: [DEFAULT_NETWORK],
});
const target = createApp({
services,
networks: [DEFAULT_NETWORK],
isTarget: true,
});
const steps = current.nextStepsForAppUpdate(
{ ...defaultContext, hasLeftoverLocks: true },
target,
);
expect(steps).to.have.length(1);
expectSteps('releaseLock', steps);
});
it('should not infer a releaseLock step if there are no locks to be released', async () => {
const services = [
await createService({ serviceName: 'server' }),