mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-02-20 17:52:51 +00:00
Merge pull request #1883 from balena-os/reduce-config-json-writes
Make the supervisor more resistant to restarts during config changes
This commit is contained in:
commit
1c82097d1d
@ -60,7 +60,7 @@ export const schemaTypes = {
|
||||
},
|
||||
appUpdatePollInterval: {
|
||||
type: PermissiveNumber,
|
||||
default: 60000,
|
||||
default: 900000,
|
||||
},
|
||||
instantUpdates: {
|
||||
type: PermissiveBoolean,
|
||||
|
@ -1,5 +1,6 @@
|
||||
import * as _ from 'lodash';
|
||||
import { inspect } from 'util';
|
||||
import { promises as fs } from 'fs';
|
||||
|
||||
import * as config from './config';
|
||||
import * as db from './db';
|
||||
@ -15,9 +16,19 @@ import { SchemaTypeKey } from './config/schema-type';
|
||||
import { matchesAnyBootConfig } from './config/backends';
|
||||
import { ConfigBackend } from './config/backends/backend';
|
||||
import { Odmdata } from './config/backends/odmdata';
|
||||
import * as fsUtils from './lib/fs-utils';
|
||||
|
||||
const vpnServiceName = 'openvpn';
|
||||
|
||||
// This indicates the file on the host /tmp directory that
|
||||
// marks the need for a reboot. Since reboot is only triggered for now
|
||||
// by some config changes, we leave this here for now. There is planned
|
||||
// functionality to allow image installs to require reboots, at that moment
|
||||
// this constant can be moved somewhere else
|
||||
const REBOOT_BREADCRUMB = fsUtils.getPathOnHost(
|
||||
'/tmp/balena-supervisor/reboot-after-apply',
|
||||
);
|
||||
|
||||
interface ConfigOption {
|
||||
envVarName: string;
|
||||
varType: string;
|
||||
@ -34,7 +45,6 @@ export interface ConfigStep {
|
||||
action: keyof DeviceActionExecutors | 'reboot' | 'noop';
|
||||
humanReadableTarget?: Dictionary<string>;
|
||||
target?: string | Dictionary<string>;
|
||||
rebootRequired?: boolean;
|
||||
}
|
||||
|
||||
interface DeviceActionExecutorOpts {
|
||||
@ -50,9 +60,9 @@ interface DeviceActionExecutors {
|
||||
changeConfig: DeviceActionExecutorFn;
|
||||
setVPNEnabled: DeviceActionExecutorFn;
|
||||
setBootConfig: DeviceActionExecutorFn;
|
||||
setRebootBreadcrumb: DeviceActionExecutorFn;
|
||||
}
|
||||
|
||||
let rebootRequired = false;
|
||||
const actionExecutors: DeviceActionExecutors = {
|
||||
changeConfig: async (step) => {
|
||||
try {
|
||||
@ -70,9 +80,6 @@ const actionExecutors: DeviceActionExecutors = {
|
||||
success: true,
|
||||
});
|
||||
}
|
||||
if (step.rebootRequired) {
|
||||
rebootRequired = true;
|
||||
}
|
||||
} catch (err) {
|
||||
if (step.humanReadableTarget) {
|
||||
logger.logConfigChange(step.humanReadableTarget, {
|
||||
@ -110,6 +117,11 @@ const actionExecutors: DeviceActionExecutors = {
|
||||
await setBootConfig(backend, step.target as Dictionary<string>);
|
||||
}
|
||||
},
|
||||
setRebootBreadcrumb: async () => {
|
||||
// Just create the file. The last step in the target state calculation will check
|
||||
// the file and create a reboot step
|
||||
await fsUtils.touch(REBOOT_BREADCRUMB);
|
||||
},
|
||||
};
|
||||
|
||||
const configBackends: ConfigBackend[] = [];
|
||||
@ -118,7 +130,7 @@ const configKeys: Dictionary<ConfigOption> = {
|
||||
appUpdatePollInterval: {
|
||||
envVarName: 'SUPERVISOR_POLL_INTERVAL',
|
||||
varType: 'int',
|
||||
defaultValue: '60000',
|
||||
defaultValue: '900000',
|
||||
},
|
||||
instantUpdates: {
|
||||
envVarName: 'SUPERVISOR_INSTANT_UPDATE_TRIGGER',
|
||||
@ -396,31 +408,14 @@ export function bootConfigChangeRequired(
|
||||
return false;
|
||||
}
|
||||
|
||||
export async function getRequiredSteps(
|
||||
currentState: DeviceStatus,
|
||||
targetState: { local?: { config?: Dictionary<string> } },
|
||||
): Promise<ConfigStep[]> {
|
||||
const current: Dictionary<string> = _.get(
|
||||
currentState,
|
||||
['local', 'config'],
|
||||
{},
|
||||
);
|
||||
const target: Dictionary<string> = _.get(
|
||||
targetState,
|
||||
['local', 'config'],
|
||||
{},
|
||||
);
|
||||
|
||||
let steps: ConfigStep[] = [];
|
||||
|
||||
const { deviceType, unmanaged } = await config.getMany([
|
||||
'deviceType',
|
||||
'unmanaged',
|
||||
]);
|
||||
|
||||
function getConfigSteps(
|
||||
current: Dictionary<string>,
|
||||
target: Dictionary<string>,
|
||||
) {
|
||||
const configChanges: Dictionary<string> = {};
|
||||
const humanReadableConfigChanges: Dictionary<string> = {};
|
||||
let reboot = false;
|
||||
const steps: ConfigStep[] = [];
|
||||
|
||||
_.each(
|
||||
configKeys,
|
||||
@ -461,14 +456,28 @@ export async function getRequiredSteps(
|
||||
);
|
||||
|
||||
if (!_.isEmpty(configChanges)) {
|
||||
if (reboot) {
|
||||
steps.push({ action: 'setRebootBreadcrumb' });
|
||||
}
|
||||
|
||||
steps.push({
|
||||
action: 'changeConfig',
|
||||
target: configChanges,
|
||||
humanReadableTarget: humanReadableConfigChanges,
|
||||
rebootRequired: reboot,
|
||||
});
|
||||
}
|
||||
|
||||
return steps;
|
||||
}
|
||||
|
||||
async function getVPNSteps(
|
||||
current: Dictionary<string>,
|
||||
target: Dictionary<string>,
|
||||
) {
|
||||
const { unmanaged } = await config.getMany(['unmanaged']);
|
||||
|
||||
let steps: ConfigStep[] = [];
|
||||
|
||||
// Check for special case actions for the VPN
|
||||
if (
|
||||
!unmanaged &&
|
||||
@ -481,6 +490,12 @@ export async function getRequiredSteps(
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: the only step that requires rate limiting is setVPNEnabled
|
||||
// do not use rate limiting in the future as it probably will change.
|
||||
// The reason rate limiting is needed for this step is because the dbus
|
||||
// API does not wait for the service response when a unit is started/stopped.
|
||||
// This would cause too many requests on systemd and a possible error.
|
||||
// Promisifying the dbus api to wait for the response would be the right solution
|
||||
const now = Date.now();
|
||||
steps = _.map(steps, (step) => {
|
||||
const action = step.action;
|
||||
@ -502,7 +517,17 @@ export async function getRequiredSteps(
|
||||
return step;
|
||||
});
|
||||
|
||||
return steps;
|
||||
}
|
||||
|
||||
async function getBackendSteps(
|
||||
current: Dictionary<string>,
|
||||
target: Dictionary<string>,
|
||||
) {
|
||||
const steps: ConfigStep[] = [];
|
||||
const backends = await getConfigBackends();
|
||||
const { deviceType } = await config.getMany(['deviceType']);
|
||||
|
||||
// Check for required bootConfig changes
|
||||
for (const backend of backends) {
|
||||
if (changeRequired(backend, current, target, deviceType)) {
|
||||
@ -513,12 +538,64 @@ export async function getRequiredSteps(
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
// All backend steps require a reboot
|
||||
...(steps.length > 0
|
||||
? [{ action: 'setRebootBreadcrumb' } as ConfigStep]
|
||||
: []),
|
||||
...steps,
|
||||
];
|
||||
}
|
||||
|
||||
async function isRebootRequired() {
|
||||
const hasBreadcrumb = await fsUtils.exists(REBOOT_BREADCRUMB);
|
||||
if (hasBreadcrumb) {
|
||||
const stats = await fs.stat(REBOOT_BREADCRUMB);
|
||||
|
||||
// If the breadcrumb exists and the last modified time is greater than the
|
||||
// boot time, that means we need to reboot
|
||||
return stats.mtime.getTime() > fsUtils.getBootTime().getTime();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export async function getRequiredSteps(
|
||||
currentState: DeviceStatus,
|
||||
targetState: { local?: { config?: Dictionary<string> } },
|
||||
): Promise<ConfigStep[]> {
|
||||
const current: Dictionary<string> = _.get(
|
||||
currentState,
|
||||
['local', 'config'],
|
||||
{},
|
||||
);
|
||||
const target: Dictionary<string> = _.get(
|
||||
targetState,
|
||||
['local', 'config'],
|
||||
{},
|
||||
);
|
||||
|
||||
const configSteps = getConfigSteps(current, target);
|
||||
const steps = [
|
||||
...configSteps,
|
||||
...(await getVPNSteps(current, target)),
|
||||
|
||||
// Only apply backend steps if no more config changes are left since
|
||||
// changing config.json may restart the supervisor
|
||||
...(configSteps.length > 0 &&
|
||||
// if any config step is a not 'noop' step, skip the backend steps
|
||||
configSteps.filter((s) => s.action !== 'noop').length > 0
|
||||
? // Set a 'noop' action so the apply function knows to retry
|
||||
[{ action: 'noop' } as ConfigStep]
|
||||
: await getBackendSteps(current, target)),
|
||||
];
|
||||
|
||||
// Check if there is either no steps, or they are all
|
||||
// noops, and we need to reboot. We want to do this
|
||||
// because in a preloaded setting with no internet
|
||||
// connection, the device will try to start containers
|
||||
// before any boot config has been applied, which can
|
||||
// cause problems
|
||||
const rebootRequired = await isRebootRequired();
|
||||
if (_.every(steps, { action: 'noop' }) && rebootRequired) {
|
||||
steps.push({
|
||||
action: 'reboot',
|
||||
@ -614,7 +691,6 @@ export async function setBootConfig(
|
||||
{},
|
||||
'Apply boot config success',
|
||||
);
|
||||
rebootRequired = true;
|
||||
return true;
|
||||
} catch (err) {
|
||||
logger.logSystemMessage(
|
||||
|
@ -3,6 +3,7 @@ import { promises as fs } from 'fs';
|
||||
import * as path from 'path';
|
||||
import { exec as execSync } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import { uptime } from 'os';
|
||||
|
||||
import * as constants from './constants';
|
||||
|
||||
@ -79,6 +80,35 @@ export async function unlinkAll(...paths: string[]): Promise<void> {
|
||||
/**
|
||||
* Get one or more paths as they exist in relation to host OS's root.
|
||||
*/
|
||||
export function getPathOnHost(...paths: string[]): string[] {
|
||||
return paths.map((p: string) => path.join(constants.rootMountPoint, p));
|
||||
export function getPathOnHost(path: string): string;
|
||||
export function getPathOnHost(...paths: string[]): string[];
|
||||
export function getPathOnHost(...paths: string[]): string[] | string {
|
||||
if (paths.length === 1) {
|
||||
return path.join(constants.rootMountPoint, paths[0]);
|
||||
} else {
|
||||
return paths.map((p: string) => path.join(constants.rootMountPoint, p));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Change modification and access time of the given file.
|
||||
* It creates an empty file if it does not exist
|
||||
*/
|
||||
export const touch = (file: string, time = new Date()) =>
|
||||
// set both access time and modified time to the value passed
|
||||
// as argument (default to `now`)
|
||||
fs.utimes(file, time, time).catch((e) =>
|
||||
// only create the file if it doesn't exist,
|
||||
// if some other error happens is probably better to not touch it
|
||||
e.code === 'ENOENT'
|
||||
? fs
|
||||
.open(file, 'w')
|
||||
.then((fd) => fd.close())
|
||||
// If date is custom we need to change the file atime and mtime
|
||||
.then(() => fs.utimes(file, time, time))
|
||||
: e,
|
||||
);
|
||||
|
||||
// Get the system boot time as a Date object
|
||||
export const getBootTime = () =>
|
||||
new Date(new Date().getTime() - uptime() * 1000);
|
||||
|
@ -145,8 +145,7 @@ export function lock<T extends unknown>(
|
||||
.then((lockOverride) => {
|
||||
return writeLock(appId)
|
||||
.tap((release: () => void) => {
|
||||
const [lockDir] = getPathOnHost(lockPath(appId));
|
||||
|
||||
const lockDir = getPathOnHost(lockPath(appId));
|
||||
return Bluebird.resolve(fs.readdir(lockDir))
|
||||
.catchReturn(ENOENT, [])
|
||||
.mapSeries((serviceName) => {
|
||||
|
@ -59,7 +59,7 @@ describe('Config', () => {
|
||||
it('allows deleting a config.json key and returns a default value if none is set', async () => {
|
||||
await conf.remove('appUpdatePollInterval');
|
||||
const poll = await conf.get('appUpdatePollInterval');
|
||||
return expect(poll).to.equal(60000);
|
||||
return expect(poll).to.equal(900000);
|
||||
});
|
||||
|
||||
it('allows deleting a config.json key if it is null', async () => {
|
||||
|
@ -1,5 +1,6 @@
|
||||
import { stripIndent } from 'common-tags';
|
||||
import { promises as fs } from 'fs';
|
||||
import * as path from 'path';
|
||||
import { SinonStub, stub, spy, SinonSpy } from 'sinon';
|
||||
import { expect } from 'chai';
|
||||
|
||||
@ -15,6 +16,7 @@ import * as constants from '../src/lib/constants';
|
||||
import * as config from '../src/config';
|
||||
|
||||
import prepare = require('./lib/prepare');
|
||||
import mock = require('mock-fs');
|
||||
|
||||
const extlinuxBackend = new Extlinux();
|
||||
const configTxtBackend = new ConfigTxt();
|
||||
@ -22,6 +24,9 @@ const odmdataBackend = new Odmdata();
|
||||
const configFsBackend = new ConfigFs();
|
||||
const splashImageBackend = new SplashImage();
|
||||
|
||||
// TODO: Since the getBootConfig method is simple enough
|
||||
// these tests could probably be removed if each backend has its own
|
||||
// test and the src/config/utils module is properly tested.
|
||||
describe('Device Backend Config', () => {
|
||||
let logSpy: SinonSpy;
|
||||
|
||||
@ -250,7 +255,7 @@ describe('Device Backend Config', () => {
|
||||
HOST_FIREWALL_MODE: 'off',
|
||||
HOST_DISCOVERABILITY: 'true',
|
||||
SUPERVISOR_VPN_CONTROL: 'true',
|
||||
SUPERVISOR_POLL_INTERVAL: '60000',
|
||||
SUPERVISOR_POLL_INTERVAL: '900000',
|
||||
SUPERVISOR_LOCAL_MODE: 'false',
|
||||
SUPERVISOR_CONNECTIVITY_CHECK: 'true',
|
||||
SUPERVISOR_LOG_CONTROL: 'true',
|
||||
@ -566,7 +571,6 @@ describe('Device Backend Config', () => {
|
||||
'fincm3',
|
||||
),
|
||||
).to.equal(true);
|
||||
|
||||
await deviceConfig.setBootConfig(splashImageBackend, target);
|
||||
|
||||
expect(fsUtils.exec).to.be.calledOnce;
|
||||
@ -675,3 +679,136 @@ describe('Device Backend Config', () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('getRequiredSteps', () => {
|
||||
const bootMountPoint = path.join(
|
||||
constants.rootMountPoint,
|
||||
constants.bootMountPoint,
|
||||
);
|
||||
const configJson = 'test/data/config.json';
|
||||
const configTxt = path.join(bootMountPoint, 'config.txt');
|
||||
const deviceTypeJson = path.join(bootMountPoint, 'device-type.json');
|
||||
const osRelease = path.join(constants.rootMountPoint, '/etc/os-release');
|
||||
const splash = path.join(bootMountPoint, 'splash/balena-logo.png');
|
||||
|
||||
// TODO: something like this could be done as a fixture instead of
|
||||
// doing the file initialisation on 00-init.ts
|
||||
const mockFs = () => {
|
||||
mock({
|
||||
// This is only needed so config.get doesn't fail
|
||||
[configJson]: JSON.stringify({}),
|
||||
[configTxt]: stripIndent`
|
||||
enable_uart=true
|
||||
`,
|
||||
[osRelease]: stripIndent`
|
||||
PRETTY_NAME="balenaOS 2.88.5+rev1"
|
||||
META_BALENA_VERSION="2.88.5"
|
||||
VARIANT_ID="dev"
|
||||
`,
|
||||
[deviceTypeJson]: JSON.stringify({
|
||||
slug: 'raspberrypi4-64',
|
||||
arch: 'aarch64',
|
||||
}),
|
||||
[splash]: Buffer.from(
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=',
|
||||
'base64',
|
||||
),
|
||||
});
|
||||
};
|
||||
|
||||
const unmockFs = () => {
|
||||
mock.restore();
|
||||
};
|
||||
|
||||
before(() => {
|
||||
mockFs();
|
||||
|
||||
// TODO: remove this once the remount on backend.ts is no longer
|
||||
// necessary
|
||||
stub(fsUtils, 'exec');
|
||||
});
|
||||
|
||||
after(() => {
|
||||
unmockFs();
|
||||
(fsUtils.exec as SinonStub).restore();
|
||||
});
|
||||
|
||||
it('returns required steps to config.json first if any', async () => {
|
||||
const steps = await deviceConfig.getRequiredSteps(
|
||||
{
|
||||
local: {
|
||||
config: {
|
||||
SUPERVISOR_POLL_INTERVAL: 900000,
|
||||
HOST_CONFIG_enable_uart: true,
|
||||
},
|
||||
},
|
||||
} as any,
|
||||
{
|
||||
local: {
|
||||
config: {
|
||||
SUPERVISOR_POLL_INTERVAL: 600000,
|
||||
HOST_CONFIG_enable_uart: false,
|
||||
},
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
expect(steps.map((s) => s.action)).to.have.members([
|
||||
// No reboot is required by this config change
|
||||
'changeConfig',
|
||||
'noop', // The noop has to be here since there are also changes from config backends
|
||||
]);
|
||||
});
|
||||
|
||||
it('sets the rebooot breadcrumb for config steps that require a reboot', async () => {
|
||||
const steps = await deviceConfig.getRequiredSteps(
|
||||
{
|
||||
local: {
|
||||
config: {
|
||||
SUPERVISOR_POLL_INTERVAL: 900000,
|
||||
SUPERVISOR_PERSISTENT_LOGGING: false,
|
||||
},
|
||||
},
|
||||
} as any,
|
||||
{
|
||||
local: {
|
||||
config: {
|
||||
SUPERVISOR_POLL_INTERVAL: 600000,
|
||||
SUPERVISOR_PERSISTENT_LOGGING: true,
|
||||
},
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
expect(steps.map((s) => s.action)).to.have.members([
|
||||
'setRebootBreadcrumb',
|
||||
'changeConfig',
|
||||
'noop',
|
||||
]);
|
||||
});
|
||||
|
||||
it('returns required steps for backends if no steps are required for config.json', async () => {
|
||||
const steps = await deviceConfig.getRequiredSteps(
|
||||
{
|
||||
local: {
|
||||
config: {
|
||||
SUPERVISOR_POLL_INTERVAL: 900000,
|
||||
SUPERVISOR_PERSISTENT_LOGGING: true,
|
||||
HOST_CONFIG_enable_uart: true,
|
||||
},
|
||||
},
|
||||
} as any,
|
||||
{
|
||||
local: {
|
||||
config: {
|
||||
SUPERVISOR_POLL_INTERVAL: 900000,
|
||||
SUPERVISOR_PERSISTENT_LOGGING: true,
|
||||
HOST_CONFIG_enable_uart: false,
|
||||
},
|
||||
},
|
||||
} as any,
|
||||
);
|
||||
expect(steps.map((s) => s.action)).to.have.members([
|
||||
'setRebootBreadcrumb',
|
||||
'setBootConfig',
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
@ -15,8 +15,14 @@ describe('lib/fs-utils', () => {
|
||||
|
||||
const mockFs = () => {
|
||||
mock({
|
||||
[testFile1]: 'foo',
|
||||
[testFile2]: 'bar',
|
||||
[testFile1]: mock.file({
|
||||
content: 'foo',
|
||||
mtime: new Date('2022-01-04T00:00:00'),
|
||||
}),
|
||||
[testFile2]: mock.file({
|
||||
content: 'bar',
|
||||
mtime: new Date('2022-01-04T00:00:00'),
|
||||
}),
|
||||
});
|
||||
};
|
||||
|
||||
@ -148,10 +154,47 @@ describe('lib/fs-utils', () => {
|
||||
after(unmockFs);
|
||||
|
||||
it("should return the paths of one or more files as they exist on host OS's root", async () => {
|
||||
expect(fsUtils.getPathOnHost(testFileName1)).to.deep.equal([testFile1]);
|
||||
expect(fsUtils.getPathOnHost(testFileName1)).to.deep.equal(testFile1);
|
||||
expect(
|
||||
fsUtils.getPathOnHost(...[testFileName1, testFileName2]),
|
||||
fsUtils.getPathOnHost(testFileName1, testFileName2),
|
||||
).to.deep.equal([testFile1, testFile2]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('touch', () => {
|
||||
beforeEach(mockFs);
|
||||
afterEach(unmockFs);
|
||||
|
||||
it('creates the file if it does not exist', async () => {
|
||||
await fsUtils.touch('somefile');
|
||||
expect(await fsUtils.exists('somefile')).to.be.true;
|
||||
});
|
||||
|
||||
it('updates the file mtime if file already exists', async () => {
|
||||
const statsBefore = await fs.stat(testFile1);
|
||||
await fsUtils.touch(testFile1);
|
||||
const statsAfter = await fs.stat(testFile1);
|
||||
|
||||
// Mtime should be different
|
||||
expect(statsAfter.mtime.getTime()).to.not.equal(
|
||||
statsBefore.mtime.getTime(),
|
||||
);
|
||||
});
|
||||
|
||||
it('allows setting a custom time for existing files', async () => {
|
||||
const customTime = new Date('1981-11-24T12:00:00');
|
||||
await fsUtils.touch(testFile1, customTime);
|
||||
const statsAfter = await fs.stat(testFile1);
|
||||
|
||||
expect(statsAfter.mtime.getTime()).to.be.equal(customTime.getTime());
|
||||
});
|
||||
|
||||
it('allows setting a custom time for newly created files', async () => {
|
||||
const customTime = new Date('1981-11-24T12:00:00');
|
||||
await fsUtils.touch('somefile', customTime);
|
||||
const statsAfter = await fs.stat('somefile');
|
||||
|
||||
expect(statsAfter.mtime.getTime()).to.be.equal(customTime.getTime());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user