mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-04-19 00:26:53 +00:00
Cache last reported current state to /mnt/root/tmp
Whenever the Supervisor reports current state, it diffs the current state with its last reported current state. However, when the Supervisor starts up, there is no last reported state, since that last report is stored in process memory. Caching the last report in a location that survives Supervisor restarts will reduce the current report bandwidth used on startup. Change-type: patch Signed-off-by: Christina Ying Wang <christina@balena.io>
This commit is contained in:
parent
d440776881
commit
eb8ad11cd7
@ -2,11 +2,8 @@ import * as url from 'url';
|
||||
import * as _ from 'lodash';
|
||||
import { CoreOptions } from 'request';
|
||||
import { performance } from 'perf_hooks';
|
||||
|
||||
import { withBackoff, OnFailureInfo } from '../lib/backoff';
|
||||
import { log } from '../lib/supervisor-console';
|
||||
import { InternalInconsistencyError, StatusError } from '../lib/errors';
|
||||
import { getRequestInstance } from '../lib/request';
|
||||
import { setTimeout } from 'timers/promises';
|
||||
import { readFile } from 'fs/promises';
|
||||
|
||||
import { DeviceState } from '../types';
|
||||
import * as config from '../config';
|
||||
@ -14,8 +11,13 @@ import { SchemaTypeKey, SchemaReturn } from '../config/schema-type';
|
||||
import * as eventTracker from '../event-tracker';
|
||||
import * as deviceState from '../device-state';
|
||||
|
||||
import { withBackoff, OnFailureInfo } from '../lib/backoff';
|
||||
import { log } from '../lib/supervisor-console';
|
||||
import { InternalInconsistencyError, StatusError } from '../lib/errors';
|
||||
import { getRequestInstance } from '../lib/request';
|
||||
import { shallowDiff, prune, empty } from '../lib/json';
|
||||
import { setTimeout } from 'timers/promises';
|
||||
import { pathOnRoot } from '../lib/host-utils';
|
||||
import { touch, writeAndSyncFile } from '../lib/fs-utils';
|
||||
|
||||
let lastReport: DeviceState = {};
|
||||
let lastReportTime: number = -Infinity;
|
||||
@ -26,6 +28,8 @@ const maxReportFrequency = 10 * 1000;
|
||||
// How often can we report metrics to the server in ms; mirrors server setting.
|
||||
// Metrics are low priority, so less frequent than maxReportFrequency.
|
||||
const maxMetricsFrequency = 300 * 1000;
|
||||
// Path of the cache for last reported state
|
||||
const CACHE_PATH = pathOnRoot('/tmp/balena-supervisor/state-report-cache');
|
||||
|
||||
// TODO: This counter is read by the healthcheck to see if the
|
||||
// supervisor is having issues to connect. We have removed the
|
||||
@ -109,6 +113,12 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) {
|
||||
await report({ body: stateDiff, opts });
|
||||
lastReportTime = performance.now();
|
||||
lastReport = currentState;
|
||||
|
||||
// Cache last report so it survives Supervisor restart.
|
||||
// On Supervisor startup, Supervisor will be able to diff between the
|
||||
// cached report and thereby report less unnecessary data.
|
||||
await cache(currentState);
|
||||
|
||||
log.info('Reported current state to the cloud');
|
||||
};
|
||||
|
||||
@ -128,6 +138,43 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache last reported current state to CACHE_PATH
|
||||
*/
|
||||
async function cache(state: DeviceState) {
|
||||
try {
|
||||
await writeAndSyncFile(CACHE_PATH, JSON.stringify(state));
|
||||
} catch (e: unknown) {
|
||||
log.debug(`Failed to cache last reported state: ${(e as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get last cached state report from CACHE_PATH
|
||||
*/
|
||||
async function getCache(): Promise<DeviceState> {
|
||||
try {
|
||||
// Touch the file, which will create it if it doesn't exist
|
||||
await touch(CACHE_PATH);
|
||||
|
||||
// Get last reported current state
|
||||
const rawStateCache = await readFile(CACHE_PATH, 'utf-8');
|
||||
const state = JSON.parse(rawStateCache);
|
||||
|
||||
// Return current state cache if valid
|
||||
if (!DeviceState.is(state)) {
|
||||
throw new Error();
|
||||
}
|
||||
log.debug('Retrieved last reported state from cache');
|
||||
return state;
|
||||
} catch {
|
||||
log.debug(
|
||||
'Could not retrieve last reported state from cache, proceeding with empty cache',
|
||||
);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function handleRetry(retryInfo: OnFailureInfo) {
|
||||
if (retryInfo.error instanceof StatusError) {
|
||||
// We don't want these errors to be classed as a report error, as this will cause
|
||||
@ -166,6 +213,9 @@ export async function startReporting() {
|
||||
throw new InternalInconsistencyError('No uuid found for local device');
|
||||
}
|
||||
|
||||
// Get last reported state from cache
|
||||
lastReport = await getCache();
|
||||
|
||||
let reportPending = false;
|
||||
// Reports current state if not already sending and prevents a state change
|
||||
// from exceeding report frequency. Returns true if sent; otherwise false.
|
||||
|
Loading…
x
Reference in New Issue
Block a user