mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2024-12-20 06:07:57 +00:00
Merge pull request #1484 from balena-io/fixup-sysinfo-reporting
Attempt a state report once every maxReportFrequency
This commit is contained in:
commit
ef9933fc8e
@ -5,7 +5,7 @@ import * as express from 'express';
|
||||
import { isLeft } from 'fp-ts/lib/Either';
|
||||
import * as t from 'io-ts';
|
||||
import * as _ from 'lodash';
|
||||
import { PinejsClientRequest, StatusError } from 'pinejs-client-request';
|
||||
import { PinejsClientRequest } from 'pinejs-client-request';
|
||||
import * as url from 'url';
|
||||
import * as deviceRegister from './lib/register-device';
|
||||
|
||||
@ -30,17 +30,10 @@ import * as logger from './logger';
|
||||
|
||||
import * as apiHelper from './lib/api-helper';
|
||||
import { Device } from './lib/api-helper';
|
||||
import { DeviceStatus } from './types/state';
|
||||
import constants = require('./lib/constants');
|
||||
|
||||
// The exponential backoff starts at 15s
|
||||
const MINIMUM_BACKOFF_DELAY = 15000;
|
||||
|
||||
const INTERNAL_STATE_KEYS = [
|
||||
'update_pending',
|
||||
'update_downloaded',
|
||||
'update_failed',
|
||||
];
|
||||
import {
|
||||
startReporting,
|
||||
stateReportErrors,
|
||||
} from './device-state/current-state';
|
||||
|
||||
interface DevicePinInfo {
|
||||
app: number;
|
||||
@ -53,16 +46,6 @@ interface DeviceTag {
|
||||
value: string;
|
||||
}
|
||||
|
||||
const lastReportedState: DeviceStatus = {
|
||||
local: {},
|
||||
dependent: {},
|
||||
};
|
||||
const stateForReport: DeviceStatus = {
|
||||
local: {},
|
||||
dependent: {},
|
||||
};
|
||||
let reportPending = false;
|
||||
export let stateReportErrors = 0;
|
||||
let readyForUpdates = false;
|
||||
|
||||
export async function healthcheck() {
|
||||
@ -277,19 +260,12 @@ export function startCurrentStateReport() {
|
||||
'Trying to start state reporting without initializing API client',
|
||||
);
|
||||
}
|
||||
deviceState.on('change', () => {
|
||||
if (!reportPending) {
|
||||
// A latency of 100ms should be acceptable and
|
||||
// allows avoiding catching docker at weird states
|
||||
reportCurrentState();
|
||||
}
|
||||
});
|
||||
reportCurrentState();
|
||||
startReporting();
|
||||
}
|
||||
|
||||
export async function fetchDeviceTags(): Promise<DeviceTag[]> {
|
||||
if (balenaApi == null) {
|
||||
throw new Error(
|
||||
throw new InternalInconsistencyError(
|
||||
'Attempt to communicate with API, without initialized client',
|
||||
);
|
||||
}
|
||||
@ -326,160 +302,6 @@ export async function fetchDeviceTags(): Promise<DeviceTag[]> {
|
||||
});
|
||||
}
|
||||
|
||||
function getStateDiff(): DeviceStatus {
|
||||
const lastReportedLocal = lastReportedState.local;
|
||||
const lastReportedDependent = lastReportedState.dependent;
|
||||
if (lastReportedLocal == null || lastReportedDependent == null) {
|
||||
throw new InternalInconsistencyError(
|
||||
`No local or dependent component of lastReportedLocal in ApiBinder.getStateDiff: ${JSON.stringify(
|
||||
lastReportedState,
|
||||
)}`,
|
||||
);
|
||||
}
|
||||
|
||||
const diff = {
|
||||
local: _(stateForReport.local)
|
||||
.omitBy((val, key: keyof DeviceStatus['local']) =>
|
||||
_.isEqual(lastReportedLocal[key], val),
|
||||
)
|
||||
.omit(INTERNAL_STATE_KEYS)
|
||||
.value(),
|
||||
dependent: _(stateForReport.dependent)
|
||||
.omitBy((val, key: keyof DeviceStatus['dependent']) =>
|
||||
_.isEqual(lastReportedDependent[key], val),
|
||||
)
|
||||
.omit(INTERNAL_STATE_KEYS)
|
||||
.value(),
|
||||
};
|
||||
|
||||
return _.omitBy(diff, _.isEmpty);
|
||||
}
|
||||
|
||||
async function sendReportPatch(
|
||||
stateDiff: DeviceStatus,
|
||||
conf: { apiEndpoint: string; uuid: string; localMode: boolean },
|
||||
) {
|
||||
if (balenaApi == null) {
|
||||
throw new InternalInconsistencyError(
|
||||
'Attempt to send report patch without an API client',
|
||||
);
|
||||
}
|
||||
|
||||
let body = stateDiff;
|
||||
if (conf.localMode) {
|
||||
body = stripDeviceStateInLocalMode(stateDiff);
|
||||
// In local mode, check if it still makes sense to send any updates after data strip.
|
||||
if (_.isEmpty(body.local)) {
|
||||
// Nothing to send.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const endpoint = url.resolve(
|
||||
conf.apiEndpoint,
|
||||
`/device/v2/${conf.uuid}/state`,
|
||||
);
|
||||
|
||||
const requestParams = _.extend(
|
||||
{
|
||||
method: 'PATCH',
|
||||
url: endpoint,
|
||||
body,
|
||||
},
|
||||
balenaApi.passthrough,
|
||||
);
|
||||
|
||||
await balenaApi._request(requestParams);
|
||||
}
|
||||
|
||||
// Returns an object that contains only status fields relevant for the local mode.
|
||||
// It basically removes information about applications state.
|
||||
export function stripDeviceStateInLocalMode(state: DeviceStatus): DeviceStatus {
|
||||
return {
|
||||
local: _.cloneDeep(
|
||||
_.omit(state.local, 'apps', 'is_on__commit', 'logs_channel'),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
const report = _.throttle(async () => {
|
||||
const stateDiff = getStateDiff();
|
||||
if (_.size(stateDiff) === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const conf = await config.getMany([
|
||||
'deviceId',
|
||||
'apiTimeout',
|
||||
'apiEndpoint',
|
||||
'uuid',
|
||||
'localMode',
|
||||
]);
|
||||
|
||||
const { apiEndpoint, uuid, localMode } = conf;
|
||||
if (uuid == null || apiEndpoint == null) {
|
||||
throw new InternalInconsistencyError(
|
||||
'No uuid or apiEndpoint provided to ApiBinder.report',
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
await Bluebird.resolve(
|
||||
sendReportPatch(stateDiff, { apiEndpoint, uuid, localMode }),
|
||||
).timeout(conf.apiTimeout);
|
||||
|
||||
stateReportErrors = 0;
|
||||
_.assign(lastReportedState.local, stateDiff.local);
|
||||
_.assign(lastReportedState.dependent, stateDiff.dependent);
|
||||
} catch (e) {
|
||||
if (e instanceof StatusError) {
|
||||
// We don't want this to be classed as a report error, as this will cause
|
||||
// the watchdog to kill the supervisor - and killing the supervisor will
|
||||
// not help in this situation
|
||||
log.error(
|
||||
`Non-200 response from the API! Status code: ${e.statusCode} - message:`,
|
||||
e,
|
||||
);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}, constants.maxReportFrequency);
|
||||
|
||||
function reportCurrentState(): null {
|
||||
(async () => {
|
||||
reportPending = true;
|
||||
try {
|
||||
const currentDeviceState = await deviceState.getStatus();
|
||||
_.assign(stateForReport.local, currentDeviceState.local);
|
||||
_.assign(stateForReport.dependent, currentDeviceState.dependent);
|
||||
|
||||
const stateDiff = getStateDiff();
|
||||
if (_.size(stateDiff) === 0) {
|
||||
reportPending = false;
|
||||
return null;
|
||||
}
|
||||
|
||||
await report();
|
||||
reportCurrentState();
|
||||
} catch (e) {
|
||||
eventTracker.track('Device state report failure', { error: e });
|
||||
// We use the poll interval as the upper limit of
|
||||
// the exponential backoff
|
||||
const maxDelay = await config.get('appUpdatePollInterval');
|
||||
const delay = Math.min(
|
||||
2 ** stateReportErrors * MINIMUM_BACKOFF_DELAY,
|
||||
maxDelay,
|
||||
);
|
||||
|
||||
++stateReportErrors;
|
||||
await Bluebird.delay(delay);
|
||||
reportCurrentState();
|
||||
}
|
||||
})();
|
||||
return null;
|
||||
}
|
||||
|
||||
async function pinDevice({ app, commit }: DevicePinInfo) {
|
||||
if (balenaApi == null) {
|
||||
throw new InternalInconsistencyError(
|
||||
|
@ -243,12 +243,16 @@ const reportCurrentState = (): null => {
|
||||
};
|
||||
|
||||
export const startReporting = () => {
|
||||
deviceState.on('change', () => {
|
||||
const doReport = () => {
|
||||
if (!reportPending) {
|
||||
// A latency of 100ms should be acceptable and
|
||||
// allows avoiding catching docker at weird states
|
||||
reportCurrentState();
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
// If the state changes, report it
|
||||
deviceState.on('change', doReport);
|
||||
// But check once every max report frequency to ensure that changes in system
|
||||
// info are picked up (CPU temp etc)
|
||||
setInterval(doReport, constants.maxReportFrequency);
|
||||
return reportCurrentState();
|
||||
};
|
||||
|
@ -447,6 +447,8 @@ describe('ApiBinder', () => {
|
||||
});
|
||||
|
||||
it('fails when stateReportHealthy is false', async () => {
|
||||
const currentState = await import('../src/device-state/current-state');
|
||||
|
||||
configStub.resolves({
|
||||
unmanaged: false,
|
||||
appUpdatePollInterval: 1000,
|
||||
@ -457,29 +459,31 @@ describe('ApiBinder', () => {
|
||||
(TargetState as any).lastFetch = process.hrtime();
|
||||
|
||||
// Copy previous values to restore later
|
||||
const previousStateReportErrors = components.apiBinder.stateReportErrors;
|
||||
const previousStateReportErrors = currentState.stateReportErrors;
|
||||
const previousDeviceStateConnected =
|
||||
// @ts-ignore
|
||||
components.deviceState.connected;
|
||||
|
||||
// Set additional conditions not in configStub to cause a fail
|
||||
components.apiBinder.stateReportErrors = 4;
|
||||
components.deviceState.connected = true;
|
||||
try {
|
||||
currentState.stateReportErrors = 4;
|
||||
components.deviceState.connected = true;
|
||||
|
||||
expect(await components.apiBinder.healthcheck()).to.equal(false);
|
||||
expect(await components.apiBinder.healthcheck()).to.equal(false);
|
||||
|
||||
expect(Log.info).to.be.calledOnce;
|
||||
expect((Log.info as SinonSpy).lastCall?.lastArg).to.equal(
|
||||
stripIndent`
|
||||
Healthcheck failure - At least ONE of the following conditions must be true:
|
||||
- No connectivityCheckEnabled ? false
|
||||
- device state is disconnected ? false
|
||||
- stateReportErrors less then 3 ? false`,
|
||||
);
|
||||
|
||||
// Restore previous values
|
||||
components.apiBinder.stateReportErrors = previousStateReportErrors;
|
||||
components.deviceState.connected = previousDeviceStateConnected;
|
||||
expect(Log.info).to.be.calledOnce;
|
||||
expect((Log.info as SinonSpy).lastCall?.lastArg).to.equal(
|
||||
stripIndent`
|
||||
Healthcheck failure - At least ONE of the following conditions must be true:
|
||||
- No connectivityCheckEnabled ? false
|
||||
- device state is disconnected ? false
|
||||
- stateReportErrors less then 3 ? false`,
|
||||
);
|
||||
} finally {
|
||||
// Restore previous values
|
||||
currentState.stateReportErrors = previousStateReportErrors;
|
||||
components.deviceState.connected = previousDeviceStateConnected;
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user