Add toggleable SUPERVISOR_HARDWARE_METRICS config

On devices with bandwidth sensitivity, this config var
disables sending system information such as memory
usage or cpu temp as current state.

Closes: #1645
Change-type: minor
Signed-off-by: Christina Wang <christina@balena.io>
This commit is contained in:
Christina Wang 2021-05-05 23:16:58 +09:00
parent ea3e50e96e
commit dcd863eed8
No known key found for this signature in database
GPG Key ID: 7C3ED0230F440835
9 changed files with 202 additions and 18 deletions

View File

@ -166,6 +166,10 @@ export const schemaTypes = {
type: PermissiveBoolean,
default: true,
},
hardwareMetrics: {
type: PermissiveBoolean,
default: true,
},
// Function schema types
// The type should be the value that the promise resolves

View File

@ -180,6 +180,11 @@ export const schema = {
mutable: true,
removeIfNull: false,
},
hardwareMetrics: {
source: 'db',
mutable: true,
removeIfNull: false,
},
};
export type Schema = typeof schema;

View File

@ -191,6 +191,11 @@ const configKeys: Dictionary<ConfigOption> = {
varType: 'bool',
defaultValue: 'true',
},
hardwareMetrics: {
envVarName: 'SUPERVISOR_HARDWARE_METRICS',
varType: 'bool',
defaultValue: 'true',
},
};
export const validKeys = [

View File

@ -44,6 +44,7 @@ type CurrentStateReportConf = {
| 'deviceApiKey'
| 'deviceId'
| 'localMode'
| 'hardwareMetrics'
>]: SchemaReturn<key>;
};
@ -61,7 +62,7 @@ const stripDeviceStateInLocalMode = (state: DeviceStatus): DeviceStatus => {
const sendReportPatch = async (
stateDiff: DeviceStatus,
conf: Omit<CurrentStateReportConf, 'deviceId'>,
conf: Omit<CurrentStateReportConf, 'deviceId' | 'hardwareMetrics'>,
) => {
let body = stateDiff;
const { apiEndpoint, apiTimeout, deviceApiKey, localMode, uuid } = conf;
@ -175,7 +176,23 @@ const reportCurrentState = (): null => {
reportPending = true;
try {
const currentDeviceState = await deviceState.getStatus();
const info = await sysInfo.getSysInfoToReport();
// If hardwareMetrics is false, send null patch for system metrics to cloud API
const info = {
...((await config.get('hardwareMetrics'))
? await sysInfo.getSystemMetrics()
: {
cpu_usage: null,
memory_usage: null,
memory_total: null,
storage_usage: null,
storage_total: null,
storage_block_device: null,
cpu_temp: null,
cpu_id: null,
}),
...(await sysInfo.getSystemChecks()),
};
stateForReport.local = {
...stateForReport.local,
...currentDeviceState.local,

View File

@ -87,14 +87,31 @@ export async function undervoltageDetected(): Promise<boolean> {
}
}
export async function getSysInfoToReport() {
const [cpu, mem, temp, cpuid, storage, undervoltage] = await Promise.all([
/**
* System metrics that are always reported in current state
* due to their importance, regardless of HARDWARE_METRICS
*/
export async function getSystemChecks() {
// TODO: feature - add more eager diagnostic style metrics here,
// such as fs corruption checks, network issues, etc.
const undervoltage = await undervoltageDetected();
return {
is_undervolted: undervoltage,
};
}
/**
* Metrics that would be reported in current state only
* when HARDWARE_METRICS config var is true.
*/
export async function getSystemMetrics() {
const [cpu, mem, temp, cpuid, storage] = await Promise.all([
getCpuUsage(),
getMemoryInformation(),
getCpuTemp(),
getCpuId(),
getStorageInfo(),
undervoltageDetected(),
]);
return {
@ -106,13 +123,14 @@ export async function getSysInfoToReport() {
storage_block_device: storage.blockDevice,
cpu_temp: temp,
cpu_id: cpuid,
is_undervolted: undervoltage,
};
}
export type SystemInfo = UnwrappedPromise<
ReturnType<typeof getSysInfoToReport>
>;
type SystemChecks = UnwrappedPromise<ReturnType<typeof getSystemChecks>>;
type SystemMetrics = UnwrappedPromise<ReturnType<typeof getSystemMetrics>>;
export type SystemInfo = SystemChecks & SystemMetrics;
const significantChange: { [key in keyof SystemInfo]?: number } = {
cpu_usage: 20,
@ -134,6 +152,11 @@ export function isSignificantChange(
if (bucketSize == null) {
return true;
}
// If the `current` parameter is null, HARDWARE_METRICS has been toggled false
// and we should include this value for the null metrics patch to the API
if (current === null) {
return true;
}
return Math.floor(current / bucketSize) !== Math.floor(past / bucketSize);
}

View File

@ -263,6 +263,7 @@ describe('Device Backend Config', () => {
SUPERVISOR_INSTANT_UPDATE_TRIGGER: 'true',
SUPERVISOR_OVERRIDE_LOCK: 'false',
SUPERVISOR_PERSISTENT_LOGGING: 'false',
SUPERVISOR_HARDWARE_METRICS: 'true',
});
});

View File

@ -1,3 +1,6 @@
/**
* Delays for input ms before resolving
*/
export async function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}

View File

@ -4,9 +4,12 @@ import rewire = require('rewire');
import * as Bluebird from 'bluebird';
import * as _ from 'lodash';
import { sleep } from '../../lib/helpers';
import * as config from '../../../src/config';
import * as deviceState from '../../../src/device-state';
import { stateReportErrors } from '../../../src/device-state/current-state';
import * as eventTracker from '../../../src/event-tracker';
import * as request from '../../../src/lib/request';
import log from '../../../src/lib/supervisor-console';
@ -42,7 +45,7 @@ describe('device-state/current-state', () => {
patchAsync: () => Bluebird.resolve([{ statusCode: 200 }]),
};
// Suite-level hooks
// Parent-level hooks to prevent any requests from actually happening in any suites below
before(() => {
stubbedGetReqInstance = stub(request, 'getRequestInstance');
stubbedGetReqInstance.resolves(requestInstance);
@ -69,7 +72,7 @@ describe('device-state/current-state', () => {
deviceApiKey: 'testapikey',
deviceId: 1337,
localMode: false,
disableHardwareMetrics: true,
hardwareMetrics: true,
};
const testDeviceReportFields = {
@ -392,7 +395,7 @@ describe('device-state/current-state', () => {
beforeEach(() => {
configGetManyStub.resolves(
_.omit(testDeviceConf, ['deviceId', 'disableHardwareMetrics']) as any,
_.omit(testDeviceConf, ['deviceId', 'hardwareMetrics']) as any,
);
});
@ -411,7 +414,7 @@ describe('device-state/current-state', () => {
configGetManyStub.resolves(
_.omit(testDeviceConf, [
'deviceId',
'disableHardwareMetrics',
'hardwareMetrics',
'uuid',
'apiEndpoint',
]) as any,
@ -455,19 +458,128 @@ describe('device-state/current-state', () => {
});
});
describe.skip('reportCurrentState', () => {
const reportPending = currentState.__get__('reportPending');
describe('reportCurrentState', () => {
const reportCurrentState = currentState.__get__('reportCurrentState');
const report = currentState.__get__('report');
let testHardwareMetrics = true;
const testAppUpdatePollInterval = 1000;
const unhandledRejectionHandler = () => {
/* noop */
};
before(() => {
stub(deviceState, 'getStatus').resolves(testCurrentState as any);
stub(config, 'get').resolves(true);
stub(deviceState, 'getStatus').resolves({});
stub(sysInfo, 'getSystemMetrics').resolves();
stub(sysInfo, 'getSystemChecks').resolves();
stub(eventTracker, 'track');
stub(config, 'get');
(config.get as SinonStub).callsFake((conf) =>
conf === 'hardwareMetrics'
? Promise.resolve(testHardwareMetrics)
: Promise.resolve(testAppUpdatePollInterval),
);
// We need to stub this rejection so that reportCurrentState doesn't keep calling itself
stub(config, 'getMany').rejects();
// We also need to stub this rejection because it's called right before
// reportCurrentState in the catch, to prevent more calls of reportCurrentState
stub(Bluebird, 'delay').rejects();
});
after(() => {
(deviceState.getStatus as SinonStub).restore();
(sysInfo.getSystemMetrics as SinonStub).restore();
(sysInfo.getSystemChecks as SinonStub).restore();
(eventTracker.track as SinonStub).restore();
(config.get as SinonStub).restore();
(config.getMany as SinonStub).restore();
(Bluebird.delay as SinonStub).restore();
});
it('does not report if current state has not changed');
beforeEach(() => {
resetGlobalStateObjects();
});
afterEach(() => {
// Clear the throttle time limit between tests
report.cancel();
});
it('does not report if current state has not changed', async () => {
// Use a temporary unhandledRejectionHandler to catch the promise
// rejection from the Bluebird.delay stub
process.on('unhandledRejection', unhandledRejectionHandler);
spy(_, 'size');
reportCurrentState();
// Wait 200ms for anonymous async IIFE inside reportCurrentState to finish executing
// TODO: is there a better way to test this? Possible race condition
await sleep(200);
expect(stateForReport).to.deep.equal({ local: {}, dependent: {} });
expect(_.size as SinonSpy).to.have.returned(0);
(_.size as SinonSpy).restore();
process.removeListener('unhandledRejection', unhandledRejectionHandler);
});
it('sends a null patch for system metrics when HARDWARE_METRICS is false', async () => {
// Use a temporary unhandledRejectionHandler to catch the promise
// rejection from the Bluebird.delay stub
process.on('unhandledRejection', unhandledRejectionHandler);
testHardwareMetrics = false;
(sysInfo.getSystemMetrics as SinonStub).resolves({ cpu_usage: 20 });
(sysInfo.getSystemChecks as SinonStub).resolves({
is_undervolted: false,
});
reportCurrentState();
await sleep(200);
expect(stateForReport).to.deep.equal({
local: {
is_undervolted: false,
cpu_usage: null,
memory_usage: null,
memory_total: null,
storage_usage: null,
storage_total: null,
storage_block_device: null,
cpu_temp: null,
cpu_id: null,
},
dependent: {},
});
process.removeListener('unhandledRejection', unhandledRejectionHandler);
});
it('reports both system metrics and system checks when HARDWARE_METRICS is true', async () => {
// Use a temporary unhandledRejectionHandler to catch the promise
// rejection from the Bluebird.delay stub
process.on('unhandledRejection', unhandledRejectionHandler);
testHardwareMetrics = true;
(sysInfo.getSystemMetrics as SinonStub).resolves({ cpu_usage: 20 });
(sysInfo.getSystemChecks as SinonStub).resolves({
is_undervolted: false,
});
reportCurrentState();
await sleep(200);
expect(stateForReport).to.deep.equal({
local: { is_undervolted: false, cpu_usage: 20 },
dependent: {},
});
process.removeListener('unhandledRejection', unhandledRejectionHandler);
});
});
});

View File

@ -62,6 +62,20 @@ describe('System information', () => {
sysInfo.isSignificantChange('memory_usage', undefined, 5),
).to.equal(true);
});
it('should not filter if the current value is null', () => {
// When the current value is null, we're sending a null patch to the
// API in response to setting HARDWARE_METRICS to false, so
// we need to include null for all values. None of the individual metrics
// in systemMetrics return null (only number/undefined), so the only
// reason for current to be null is when a null patch is happening.
expect(sysInfo.isSignificantChange('cpu_usage', 15, null as any)).to.be
.true;
expect(sysInfo.isSignificantChange('cpu_temp', 55, null as any)).to.be
.true;
expect(sysInfo.isSignificantChange('memory_usage', 760, null as any)).to
.be.true;
});
});
describe('CPU information', () => {