mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2024-12-19 13:47:54 +00:00
Add toggleable SUPERVISOR_HARDWARE_METRICS
config
On devices with bandwidth sensitivity, this config var disables sending system information such as memory usage or cpu temp as current state. Closes: #1645 Change-type: minor Signed-off-by: Christina Wang <christina@balena.io>
This commit is contained in:
parent
ea3e50e96e
commit
dcd863eed8
@ -166,6 +166,10 @@ export const schemaTypes = {
|
||||
type: PermissiveBoolean,
|
||||
default: true,
|
||||
},
|
||||
hardwareMetrics: {
|
||||
type: PermissiveBoolean,
|
||||
default: true,
|
||||
},
|
||||
|
||||
// Function schema types
|
||||
// The type should be the value that the promise resolves
|
||||
|
@ -180,6 +180,11 @@ export const schema = {
|
||||
mutable: true,
|
||||
removeIfNull: false,
|
||||
},
|
||||
hardwareMetrics: {
|
||||
source: 'db',
|
||||
mutable: true,
|
||||
removeIfNull: false,
|
||||
},
|
||||
};
|
||||
|
||||
export type Schema = typeof schema;
|
||||
|
@ -191,6 +191,11 @@ const configKeys: Dictionary<ConfigOption> = {
|
||||
varType: 'bool',
|
||||
defaultValue: 'true',
|
||||
},
|
||||
hardwareMetrics: {
|
||||
envVarName: 'SUPERVISOR_HARDWARE_METRICS',
|
||||
varType: 'bool',
|
||||
defaultValue: 'true',
|
||||
},
|
||||
};
|
||||
|
||||
export const validKeys = [
|
||||
|
@ -44,6 +44,7 @@ type CurrentStateReportConf = {
|
||||
| 'deviceApiKey'
|
||||
| 'deviceId'
|
||||
| 'localMode'
|
||||
| 'hardwareMetrics'
|
||||
>]: SchemaReturn<key>;
|
||||
};
|
||||
|
||||
@ -61,7 +62,7 @@ const stripDeviceStateInLocalMode = (state: DeviceStatus): DeviceStatus => {
|
||||
|
||||
const sendReportPatch = async (
|
||||
stateDiff: DeviceStatus,
|
||||
conf: Omit<CurrentStateReportConf, 'deviceId'>,
|
||||
conf: Omit<CurrentStateReportConf, 'deviceId' | 'hardwareMetrics'>,
|
||||
) => {
|
||||
let body = stateDiff;
|
||||
const { apiEndpoint, apiTimeout, deviceApiKey, localMode, uuid } = conf;
|
||||
@ -175,7 +176,23 @@ const reportCurrentState = (): null => {
|
||||
reportPending = true;
|
||||
try {
|
||||
const currentDeviceState = await deviceState.getStatus();
|
||||
const info = await sysInfo.getSysInfoToReport();
|
||||
// If hardwareMetrics is false, send null patch for system metrics to cloud API
|
||||
const info = {
|
||||
...((await config.get('hardwareMetrics'))
|
||||
? await sysInfo.getSystemMetrics()
|
||||
: {
|
||||
cpu_usage: null,
|
||||
memory_usage: null,
|
||||
memory_total: null,
|
||||
storage_usage: null,
|
||||
storage_total: null,
|
||||
storage_block_device: null,
|
||||
cpu_temp: null,
|
||||
cpu_id: null,
|
||||
}),
|
||||
...(await sysInfo.getSystemChecks()),
|
||||
};
|
||||
|
||||
stateForReport.local = {
|
||||
...stateForReport.local,
|
||||
...currentDeviceState.local,
|
||||
|
@ -87,14 +87,31 @@ export async function undervoltageDetected(): Promise<boolean> {
|
||||
}
|
||||
}
|
||||
|
||||
export async function getSysInfoToReport() {
|
||||
const [cpu, mem, temp, cpuid, storage, undervoltage] = await Promise.all([
|
||||
/**
|
||||
* System metrics that are always reported in current state
|
||||
* due to their importance, regardless of HARDWARE_METRICS
|
||||
*/
|
||||
export async function getSystemChecks() {
|
||||
// TODO: feature - add more eager diagnostic style metrics here,
|
||||
// such as fs corruption checks, network issues, etc.
|
||||
const undervoltage = await undervoltageDetected();
|
||||
|
||||
return {
|
||||
is_undervolted: undervoltage,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Metrics that would be reported in current state only
|
||||
* when HARDWARE_METRICS config var is true.
|
||||
*/
|
||||
export async function getSystemMetrics() {
|
||||
const [cpu, mem, temp, cpuid, storage] = await Promise.all([
|
||||
getCpuUsage(),
|
||||
getMemoryInformation(),
|
||||
getCpuTemp(),
|
||||
getCpuId(),
|
||||
getStorageInfo(),
|
||||
undervoltageDetected(),
|
||||
]);
|
||||
|
||||
return {
|
||||
@ -106,13 +123,14 @@ export async function getSysInfoToReport() {
|
||||
storage_block_device: storage.blockDevice,
|
||||
cpu_temp: temp,
|
||||
cpu_id: cpuid,
|
||||
is_undervolted: undervoltage,
|
||||
};
|
||||
}
|
||||
|
||||
export type SystemInfo = UnwrappedPromise<
|
||||
ReturnType<typeof getSysInfoToReport>
|
||||
>;
|
||||
type SystemChecks = UnwrappedPromise<ReturnType<typeof getSystemChecks>>;
|
||||
|
||||
type SystemMetrics = UnwrappedPromise<ReturnType<typeof getSystemMetrics>>;
|
||||
|
||||
export type SystemInfo = SystemChecks & SystemMetrics;
|
||||
|
||||
const significantChange: { [key in keyof SystemInfo]?: number } = {
|
||||
cpu_usage: 20,
|
||||
@ -134,6 +152,11 @@ export function isSignificantChange(
|
||||
if (bucketSize == null) {
|
||||
return true;
|
||||
}
|
||||
// If the `current` parameter is null, HARDWARE_METRICS has been toggled false
|
||||
// and we should include this value for the null metrics patch to the API
|
||||
if (current === null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return Math.floor(current / bucketSize) !== Math.floor(past / bucketSize);
|
||||
}
|
||||
|
@ -263,6 +263,7 @@ describe('Device Backend Config', () => {
|
||||
SUPERVISOR_INSTANT_UPDATE_TRIGGER: 'true',
|
||||
SUPERVISOR_OVERRIDE_LOCK: 'false',
|
||||
SUPERVISOR_PERSISTENT_LOGGING: 'false',
|
||||
SUPERVISOR_HARDWARE_METRICS: 'true',
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -1,3 +1,6 @@
|
||||
/**
|
||||
* Delays for input ms before resolving
|
||||
*/
|
||||
export async function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
@ -4,9 +4,12 @@ import rewire = require('rewire');
|
||||
import * as Bluebird from 'bluebird';
|
||||
import * as _ from 'lodash';
|
||||
|
||||
import { sleep } from '../../lib/helpers';
|
||||
|
||||
import * as config from '../../../src/config';
|
||||
import * as deviceState from '../../../src/device-state';
|
||||
import { stateReportErrors } from '../../../src/device-state/current-state';
|
||||
import * as eventTracker from '../../../src/event-tracker';
|
||||
|
||||
import * as request from '../../../src/lib/request';
|
||||
import log from '../../../src/lib/supervisor-console';
|
||||
@ -42,7 +45,7 @@ describe('device-state/current-state', () => {
|
||||
patchAsync: () => Bluebird.resolve([{ statusCode: 200 }]),
|
||||
};
|
||||
|
||||
// Suite-level hooks
|
||||
// Parent-level hooks to prevent any requests from actually happening in any suites below
|
||||
before(() => {
|
||||
stubbedGetReqInstance = stub(request, 'getRequestInstance');
|
||||
stubbedGetReqInstance.resolves(requestInstance);
|
||||
@ -69,7 +72,7 @@ describe('device-state/current-state', () => {
|
||||
deviceApiKey: 'testapikey',
|
||||
deviceId: 1337,
|
||||
localMode: false,
|
||||
disableHardwareMetrics: true,
|
||||
hardwareMetrics: true,
|
||||
};
|
||||
|
||||
const testDeviceReportFields = {
|
||||
@ -392,7 +395,7 @@ describe('device-state/current-state', () => {
|
||||
|
||||
beforeEach(() => {
|
||||
configGetManyStub.resolves(
|
||||
_.omit(testDeviceConf, ['deviceId', 'disableHardwareMetrics']) as any,
|
||||
_.omit(testDeviceConf, ['deviceId', 'hardwareMetrics']) as any,
|
||||
);
|
||||
});
|
||||
|
||||
@ -411,7 +414,7 @@ describe('device-state/current-state', () => {
|
||||
configGetManyStub.resolves(
|
||||
_.omit(testDeviceConf, [
|
||||
'deviceId',
|
||||
'disableHardwareMetrics',
|
||||
'hardwareMetrics',
|
||||
'uuid',
|
||||
'apiEndpoint',
|
||||
]) as any,
|
||||
@ -455,19 +458,128 @@ describe('device-state/current-state', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe.skip('reportCurrentState', () => {
|
||||
const reportPending = currentState.__get__('reportPending');
|
||||
describe('reportCurrentState', () => {
|
||||
const reportCurrentState = currentState.__get__('reportCurrentState');
|
||||
const report = currentState.__get__('report');
|
||||
let testHardwareMetrics = true;
|
||||
const testAppUpdatePollInterval = 1000;
|
||||
|
||||
const unhandledRejectionHandler = () => {
|
||||
/* noop */
|
||||
};
|
||||
|
||||
before(() => {
|
||||
stub(deviceState, 'getStatus').resolves(testCurrentState as any);
|
||||
stub(config, 'get').resolves(true);
|
||||
stub(deviceState, 'getStatus').resolves({});
|
||||
stub(sysInfo, 'getSystemMetrics').resolves();
|
||||
stub(sysInfo, 'getSystemChecks').resolves();
|
||||
stub(eventTracker, 'track');
|
||||
stub(config, 'get');
|
||||
(config.get as SinonStub).callsFake((conf) =>
|
||||
conf === 'hardwareMetrics'
|
||||
? Promise.resolve(testHardwareMetrics)
|
||||
: Promise.resolve(testAppUpdatePollInterval),
|
||||
);
|
||||
// We need to stub this rejection so that reportCurrentState doesn't keep calling itself
|
||||
stub(config, 'getMany').rejects();
|
||||
// We also need to stub this rejection because it's called right before
|
||||
// reportCurrentState in the catch, to prevent more calls of reportCurrentState
|
||||
stub(Bluebird, 'delay').rejects();
|
||||
});
|
||||
|
||||
after(() => {
|
||||
(deviceState.getStatus as SinonStub).restore();
|
||||
(sysInfo.getSystemMetrics as SinonStub).restore();
|
||||
(sysInfo.getSystemChecks as SinonStub).restore();
|
||||
(eventTracker.track as SinonStub).restore();
|
||||
(config.get as SinonStub).restore();
|
||||
(config.getMany as SinonStub).restore();
|
||||
(Bluebird.delay as SinonStub).restore();
|
||||
});
|
||||
|
||||
it('does not report if current state has not changed');
|
||||
beforeEach(() => {
|
||||
resetGlobalStateObjects();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// Clear the throttle time limit between tests
|
||||
report.cancel();
|
||||
});
|
||||
|
||||
it('does not report if current state has not changed', async () => {
|
||||
// Use a temporary unhandledRejectionHandler to catch the promise
|
||||
// rejection from the Bluebird.delay stub
|
||||
process.on('unhandledRejection', unhandledRejectionHandler);
|
||||
spy(_, 'size');
|
||||
|
||||
reportCurrentState();
|
||||
|
||||
// Wait 200ms for anonymous async IIFE inside reportCurrentState to finish executing
|
||||
// TODO: is there a better way to test this? Possible race condition
|
||||
await sleep(200);
|
||||
|
||||
expect(stateForReport).to.deep.equal({ local: {}, dependent: {} });
|
||||
expect(_.size as SinonSpy).to.have.returned(0);
|
||||
|
||||
(_.size as SinonSpy).restore();
|
||||
process.removeListener('unhandledRejection', unhandledRejectionHandler);
|
||||
});
|
||||
|
||||
it('sends a null patch for system metrics when HARDWARE_METRICS is false', async () => {
|
||||
// Use a temporary unhandledRejectionHandler to catch the promise
|
||||
// rejection from the Bluebird.delay stub
|
||||
process.on('unhandledRejection', unhandledRejectionHandler);
|
||||
|
||||
testHardwareMetrics = false;
|
||||
|
||||
(sysInfo.getSystemMetrics as SinonStub).resolves({ cpu_usage: 20 });
|
||||
(sysInfo.getSystemChecks as SinonStub).resolves({
|
||||
is_undervolted: false,
|
||||
});
|
||||
|
||||
reportCurrentState();
|
||||
|
||||
await sleep(200);
|
||||
|
||||
expect(stateForReport).to.deep.equal({
|
||||
local: {
|
||||
is_undervolted: false,
|
||||
cpu_usage: null,
|
||||
memory_usage: null,
|
||||
memory_total: null,
|
||||
storage_usage: null,
|
||||
storage_total: null,
|
||||
storage_block_device: null,
|
||||
cpu_temp: null,
|
||||
cpu_id: null,
|
||||
},
|
||||
dependent: {},
|
||||
});
|
||||
|
||||
process.removeListener('unhandledRejection', unhandledRejectionHandler);
|
||||
});
|
||||
|
||||
it('reports both system metrics and system checks when HARDWARE_METRICS is true', async () => {
|
||||
// Use a temporary unhandledRejectionHandler to catch the promise
|
||||
// rejection from the Bluebird.delay stub
|
||||
process.on('unhandledRejection', unhandledRejectionHandler);
|
||||
|
||||
testHardwareMetrics = true;
|
||||
|
||||
(sysInfo.getSystemMetrics as SinonStub).resolves({ cpu_usage: 20 });
|
||||
(sysInfo.getSystemChecks as SinonStub).resolves({
|
||||
is_undervolted: false,
|
||||
});
|
||||
|
||||
reportCurrentState();
|
||||
|
||||
await sleep(200);
|
||||
|
||||
expect(stateForReport).to.deep.equal({
|
||||
local: { is_undervolted: false, cpu_usage: 20 },
|
||||
dependent: {},
|
||||
});
|
||||
|
||||
process.removeListener('unhandledRejection', unhandledRejectionHandler);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
@ -62,6 +62,20 @@ describe('System information', () => {
|
||||
sysInfo.isSignificantChange('memory_usage', undefined, 5),
|
||||
).to.equal(true);
|
||||
});
|
||||
|
||||
it('should not filter if the current value is null', () => {
|
||||
// When the current value is null, we're sending a null patch to the
|
||||
// API in response to setting HARDWARE_METRICS to false, so
|
||||
// we need to include null for all values. None of the individual metrics
|
||||
// in systemMetrics return null (only number/undefined), so the only
|
||||
// reason for current to be null is when a null patch is happening.
|
||||
expect(sysInfo.isSignificantChange('cpu_usage', 15, null as any)).to.be
|
||||
.true;
|
||||
expect(sysInfo.isSignificantChange('cpu_temp', 55, null as any)).to.be
|
||||
.true;
|
||||
expect(sysInfo.isSignificantChange('memory_usage', 760, null as any)).to
|
||||
.be.true;
|
||||
});
|
||||
});
|
||||
|
||||
describe('CPU information', () => {
|
||||
|
Loading…
Reference in New Issue
Block a user