Merge pull request #1883 from balena-os/reduce-config-json-writes

Make the supervisor more resistant to restarts during config changes
This commit is contained in:
bulldozer-balena[bot] 2022-02-16 13:28:30 +00:00 committed by GitHub
commit 1c82097d1d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 327 additions and 42 deletions

View File

@ -60,7 +60,7 @@ export const schemaTypes = {
},
appUpdatePollInterval: {
type: PermissiveNumber,
default: 60000,
default: 900000,
},
instantUpdates: {
type: PermissiveBoolean,

View File

@ -1,5 +1,6 @@
import * as _ from 'lodash';
import { inspect } from 'util';
import { promises as fs } from 'fs';
import * as config from './config';
import * as db from './db';
@ -15,9 +16,19 @@ import { SchemaTypeKey } from './config/schema-type';
import { matchesAnyBootConfig } from './config/backends';
import { ConfigBackend } from './config/backends/backend';
import { Odmdata } from './config/backends/odmdata';
import * as fsUtils from './lib/fs-utils';
const vpnServiceName = 'openvpn';
// This indicates the file on the host /tmp directory that
// marks the need for a reboot. Since reboot is only triggered for now
// by some config changes, we leave this here for now. There is planned
// functionality to allow image installs to require reboots, at that moment
// this constant can be moved somewhere else
const REBOOT_BREADCRUMB = fsUtils.getPathOnHost(
'/tmp/balena-supervisor/reboot-after-apply',
);
interface ConfigOption {
envVarName: string;
varType: string;
@ -34,7 +45,6 @@ export interface ConfigStep {
action: keyof DeviceActionExecutors | 'reboot' | 'noop';
humanReadableTarget?: Dictionary<string>;
target?: string | Dictionary<string>;
rebootRequired?: boolean;
}
interface DeviceActionExecutorOpts {
@ -50,9 +60,9 @@ interface DeviceActionExecutors {
changeConfig: DeviceActionExecutorFn;
setVPNEnabled: DeviceActionExecutorFn;
setBootConfig: DeviceActionExecutorFn;
setRebootBreadcrumb: DeviceActionExecutorFn;
}
let rebootRequired = false;
const actionExecutors: DeviceActionExecutors = {
changeConfig: async (step) => {
try {
@ -70,9 +80,6 @@ const actionExecutors: DeviceActionExecutors = {
success: true,
});
}
if (step.rebootRequired) {
rebootRequired = true;
}
} catch (err) {
if (step.humanReadableTarget) {
logger.logConfigChange(step.humanReadableTarget, {
@ -110,6 +117,11 @@ const actionExecutors: DeviceActionExecutors = {
await setBootConfig(backend, step.target as Dictionary<string>);
}
},
setRebootBreadcrumb: async () => {
// Just create the file. The last step in the target state calculation will check
// the file and create a reboot step
await fsUtils.touch(REBOOT_BREADCRUMB);
},
};
const configBackends: ConfigBackend[] = [];
@ -118,7 +130,7 @@ const configKeys: Dictionary<ConfigOption> = {
appUpdatePollInterval: {
envVarName: 'SUPERVISOR_POLL_INTERVAL',
varType: 'int',
defaultValue: '60000',
defaultValue: '900000',
},
instantUpdates: {
envVarName: 'SUPERVISOR_INSTANT_UPDATE_TRIGGER',
@ -396,31 +408,14 @@ export function bootConfigChangeRequired(
return false;
}
export async function getRequiredSteps(
currentState: DeviceStatus,
targetState: { local?: { config?: Dictionary<string> } },
): Promise<ConfigStep[]> {
const current: Dictionary<string> = _.get(
currentState,
['local', 'config'],
{},
);
const target: Dictionary<string> = _.get(
targetState,
['local', 'config'],
{},
);
let steps: ConfigStep[] = [];
const { deviceType, unmanaged } = await config.getMany([
'deviceType',
'unmanaged',
]);
function getConfigSteps(
current: Dictionary<string>,
target: Dictionary<string>,
) {
const configChanges: Dictionary<string> = {};
const humanReadableConfigChanges: Dictionary<string> = {};
let reboot = false;
const steps: ConfigStep[] = [];
_.each(
configKeys,
@ -461,14 +456,28 @@ export async function getRequiredSteps(
);
if (!_.isEmpty(configChanges)) {
if (reboot) {
steps.push({ action: 'setRebootBreadcrumb' });
}
steps.push({
action: 'changeConfig',
target: configChanges,
humanReadableTarget: humanReadableConfigChanges,
rebootRequired: reboot,
});
}
return steps;
}
async function getVPNSteps(
current: Dictionary<string>,
target: Dictionary<string>,
) {
const { unmanaged } = await config.getMany(['unmanaged']);
let steps: ConfigStep[] = [];
// Check for special case actions for the VPN
if (
!unmanaged &&
@ -481,6 +490,12 @@ export async function getRequiredSteps(
});
}
// TODO: the only step that requires rate limiting is setVPNEnabled
// do not use rate limiting in the future as it probably will change.
// The reason rate limiting is needed for this step is because the dbus
// API does not wait for the service response when a unit is started/stopped.
// This would cause too many requests on systemd and a possible error.
// Promisifying the dbus api to wait for the response would be the right solution
const now = Date.now();
steps = _.map(steps, (step) => {
const action = step.action;
@ -502,7 +517,17 @@ export async function getRequiredSteps(
return step;
});
return steps;
}
async function getBackendSteps(
current: Dictionary<string>,
target: Dictionary<string>,
) {
const steps: ConfigStep[] = [];
const backends = await getConfigBackends();
const { deviceType } = await config.getMany(['deviceType']);
// Check for required bootConfig changes
for (const backend of backends) {
if (changeRequired(backend, current, target, deviceType)) {
@ -513,12 +538,64 @@ export async function getRequiredSteps(
}
}
return [
// All backend steps require a reboot
...(steps.length > 0
? [{ action: 'setRebootBreadcrumb' } as ConfigStep]
: []),
...steps,
];
}
async function isRebootRequired() {
const hasBreadcrumb = await fsUtils.exists(REBOOT_BREADCRUMB);
if (hasBreadcrumb) {
const stats = await fs.stat(REBOOT_BREADCRUMB);
// If the breadcrumb exists and the last modified time is greater than the
// boot time, that means we need to reboot
return stats.mtime.getTime() > fsUtils.getBootTime().getTime();
}
return false;
}
export async function getRequiredSteps(
currentState: DeviceStatus,
targetState: { local?: { config?: Dictionary<string> } },
): Promise<ConfigStep[]> {
const current: Dictionary<string> = _.get(
currentState,
['local', 'config'],
{},
);
const target: Dictionary<string> = _.get(
targetState,
['local', 'config'],
{},
);
const configSteps = getConfigSteps(current, target);
const steps = [
...configSteps,
...(await getVPNSteps(current, target)),
// Only apply backend steps if no more config changes are left since
// changing config.json may restart the supervisor
...(configSteps.length > 0 &&
// if any config step is a not 'noop' step, skip the backend steps
configSteps.filter((s) => s.action !== 'noop').length > 0
? // Set a 'noop' action so the apply function knows to retry
[{ action: 'noop' } as ConfigStep]
: await getBackendSteps(current, target)),
];
// Check if there is either no steps, or they are all
// noops, and we need to reboot. We want to do this
// because in a preloaded setting with no internet
// connection, the device will try to start containers
// before any boot config has been applied, which can
// cause problems
const rebootRequired = await isRebootRequired();
if (_.every(steps, { action: 'noop' }) && rebootRequired) {
steps.push({
action: 'reboot',
@ -614,7 +691,6 @@ export async function setBootConfig(
{},
'Apply boot config success',
);
rebootRequired = true;
return true;
} catch (err) {
logger.logSystemMessage(

View File

@ -3,6 +3,7 @@ import { promises as fs } from 'fs';
import * as path from 'path';
import { exec as execSync } from 'child_process';
import { promisify } from 'util';
import { uptime } from 'os';
import * as constants from './constants';
@ -79,6 +80,35 @@ export async function unlinkAll(...paths: string[]): Promise<void> {
/**
* Get one or more paths as they exist in relation to host OS's root.
*/
export function getPathOnHost(...paths: string[]): string[] {
return paths.map((p: string) => path.join(constants.rootMountPoint, p));
export function getPathOnHost(path: string): string;
export function getPathOnHost(...paths: string[]): string[];
export function getPathOnHost(...paths: string[]): string[] | string {
if (paths.length === 1) {
return path.join(constants.rootMountPoint, paths[0]);
} else {
return paths.map((p: string) => path.join(constants.rootMountPoint, p));
}
}
/**
* Change modification and access time of the given file.
* It creates an empty file if it does not exist
*/
export const touch = (file: string, time = new Date()) =>
// set both access time and modified time to the value passed
// as argument (default to `now`)
fs.utimes(file, time, time).catch((e) =>
// only create the file if it doesn't exist,
// if some other error happens is probably better to not touch it
e.code === 'ENOENT'
? fs
.open(file, 'w')
.then((fd) => fd.close())
// If date is custom we need to change the file atime and mtime
.then(() => fs.utimes(file, time, time))
: e,
);
// Get the system boot time as a Date object
export const getBootTime = () =>
new Date(new Date().getTime() - uptime() * 1000);

View File

@ -145,8 +145,7 @@ export function lock<T extends unknown>(
.then((lockOverride) => {
return writeLock(appId)
.tap((release: () => void) => {
const [lockDir] = getPathOnHost(lockPath(appId));
const lockDir = getPathOnHost(lockPath(appId));
return Bluebird.resolve(fs.readdir(lockDir))
.catchReturn(ENOENT, [])
.mapSeries((serviceName) => {

View File

@ -59,7 +59,7 @@ describe('Config', () => {
it('allows deleting a config.json key and returns a default value if none is set', async () => {
await conf.remove('appUpdatePollInterval');
const poll = await conf.get('appUpdatePollInterval');
return expect(poll).to.equal(60000);
return expect(poll).to.equal(900000);
});
it('allows deleting a config.json key if it is null', async () => {

View File

@ -1,5 +1,6 @@
import { stripIndent } from 'common-tags';
import { promises as fs } from 'fs';
import * as path from 'path';
import { SinonStub, stub, spy, SinonSpy } from 'sinon';
import { expect } from 'chai';
@ -15,6 +16,7 @@ import * as constants from '../src/lib/constants';
import * as config from '../src/config';
import prepare = require('./lib/prepare');
import mock = require('mock-fs');
const extlinuxBackend = new Extlinux();
const configTxtBackend = new ConfigTxt();
@ -22,6 +24,9 @@ const odmdataBackend = new Odmdata();
const configFsBackend = new ConfigFs();
const splashImageBackend = new SplashImage();
// TODO: Since the getBootConfig method is simple enough
// these tests could probably be removed if each backend has its own
// test and the src/config/utils module is properly tested.
describe('Device Backend Config', () => {
let logSpy: SinonSpy;
@ -250,7 +255,7 @@ describe('Device Backend Config', () => {
HOST_FIREWALL_MODE: 'off',
HOST_DISCOVERABILITY: 'true',
SUPERVISOR_VPN_CONTROL: 'true',
SUPERVISOR_POLL_INTERVAL: '60000',
SUPERVISOR_POLL_INTERVAL: '900000',
SUPERVISOR_LOCAL_MODE: 'false',
SUPERVISOR_CONNECTIVITY_CHECK: 'true',
SUPERVISOR_LOG_CONTROL: 'true',
@ -566,7 +571,6 @@ describe('Device Backend Config', () => {
'fincm3',
),
).to.equal(true);
await deviceConfig.setBootConfig(splashImageBackend, target);
expect(fsUtils.exec).to.be.calledOnce;
@ -675,3 +679,136 @@ describe('Device Backend Config', () => {
});
});
});
describe('getRequiredSteps', () => {
const bootMountPoint = path.join(
constants.rootMountPoint,
constants.bootMountPoint,
);
const configJson = 'test/data/config.json';
const configTxt = path.join(bootMountPoint, 'config.txt');
const deviceTypeJson = path.join(bootMountPoint, 'device-type.json');
const osRelease = path.join(constants.rootMountPoint, '/etc/os-release');
const splash = path.join(bootMountPoint, 'splash/balena-logo.png');
// TODO: something like this could be done as a fixture instead of
// doing the file initialisation on 00-init.ts
const mockFs = () => {
mock({
// This is only needed so config.get doesn't fail
[configJson]: JSON.stringify({}),
[configTxt]: stripIndent`
enable_uart=true
`,
[osRelease]: stripIndent`
PRETTY_NAME="balenaOS 2.88.5+rev1"
META_BALENA_VERSION="2.88.5"
VARIANT_ID="dev"
`,
[deviceTypeJson]: JSON.stringify({
slug: 'raspberrypi4-64',
arch: 'aarch64',
}),
[splash]: Buffer.from(
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=',
'base64',
),
});
};
const unmockFs = () => {
mock.restore();
};
before(() => {
mockFs();
// TODO: remove this once the remount on backend.ts is no longer
// necessary
stub(fsUtils, 'exec');
});
after(() => {
unmockFs();
(fsUtils.exec as SinonStub).restore();
});
it('returns required steps to config.json first if any', async () => {
const steps = await deviceConfig.getRequiredSteps(
{
local: {
config: {
SUPERVISOR_POLL_INTERVAL: 900000,
HOST_CONFIG_enable_uart: true,
},
},
} as any,
{
local: {
config: {
SUPERVISOR_POLL_INTERVAL: 600000,
HOST_CONFIG_enable_uart: false,
},
},
} as any,
);
expect(steps.map((s) => s.action)).to.have.members([
// No reboot is required by this config change
'changeConfig',
'noop', // The noop has to be here since there are also changes from config backends
]);
});
it('sets the rebooot breadcrumb for config steps that require a reboot', async () => {
const steps = await deviceConfig.getRequiredSteps(
{
local: {
config: {
SUPERVISOR_POLL_INTERVAL: 900000,
SUPERVISOR_PERSISTENT_LOGGING: false,
},
},
} as any,
{
local: {
config: {
SUPERVISOR_POLL_INTERVAL: 600000,
SUPERVISOR_PERSISTENT_LOGGING: true,
},
},
} as any,
);
expect(steps.map((s) => s.action)).to.have.members([
'setRebootBreadcrumb',
'changeConfig',
'noop',
]);
});
it('returns required steps for backends if no steps are required for config.json', async () => {
const steps = await deviceConfig.getRequiredSteps(
{
local: {
config: {
SUPERVISOR_POLL_INTERVAL: 900000,
SUPERVISOR_PERSISTENT_LOGGING: true,
HOST_CONFIG_enable_uart: true,
},
},
} as any,
{
local: {
config: {
SUPERVISOR_POLL_INTERVAL: 900000,
SUPERVISOR_PERSISTENT_LOGGING: true,
HOST_CONFIG_enable_uart: false,
},
},
} as any,
);
expect(steps.map((s) => s.action)).to.have.members([
'setRebootBreadcrumb',
'setBootConfig',
]);
});
});

View File

@ -15,8 +15,14 @@ describe('lib/fs-utils', () => {
const mockFs = () => {
mock({
[testFile1]: 'foo',
[testFile2]: 'bar',
[testFile1]: mock.file({
content: 'foo',
mtime: new Date('2022-01-04T00:00:00'),
}),
[testFile2]: mock.file({
content: 'bar',
mtime: new Date('2022-01-04T00:00:00'),
}),
});
};
@ -148,10 +154,47 @@ describe('lib/fs-utils', () => {
after(unmockFs);
it("should return the paths of one or more files as they exist on host OS's root", async () => {
expect(fsUtils.getPathOnHost(testFileName1)).to.deep.equal([testFile1]);
expect(fsUtils.getPathOnHost(testFileName1)).to.deep.equal(testFile1);
expect(
fsUtils.getPathOnHost(...[testFileName1, testFileName2]),
fsUtils.getPathOnHost(testFileName1, testFileName2),
).to.deep.equal([testFile1, testFile2]);
});
});
describe('touch', () => {
beforeEach(mockFs);
afterEach(unmockFs);
it('creates the file if it does not exist', async () => {
await fsUtils.touch('somefile');
expect(await fsUtils.exists('somefile')).to.be.true;
});
it('updates the file mtime if file already exists', async () => {
const statsBefore = await fs.stat(testFile1);
await fsUtils.touch(testFile1);
const statsAfter = await fs.stat(testFile1);
// Mtime should be different
expect(statsAfter.mtime.getTime()).to.not.equal(
statsBefore.mtime.getTime(),
);
});
it('allows setting a custom time for existing files', async () => {
const customTime = new Date('1981-11-24T12:00:00');
await fsUtils.touch(testFile1, customTime);
const statsAfter = await fs.stat(testFile1);
expect(statsAfter.mtime.getTime()).to.be.equal(customTime.getTime());
});
it('allows setting a custom time for newly created files', async () => {
const customTime = new Date('1981-11-24T12:00:00');
await fsUtils.touch('somefile', customTime);
const statsAfter = await fs.stat('somefile');
expect(statsAfter.mtime.getTime()).to.be.equal(customTime.getTime());
});
});
});