mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2024-12-19 05:37:53 +00:00
Merge pull request #2225 from balena-os/cache-last-reported-current-state
Cache last reported current state
This commit is contained in:
commit
b53bd31332
@ -2,11 +2,8 @@ import * as url from 'url';
|
||||
import * as _ from 'lodash';
|
||||
import { CoreOptions } from 'request';
|
||||
import { performance } from 'perf_hooks';
|
||||
|
||||
import { withBackoff, OnFailureInfo } from '../lib/backoff';
|
||||
import { log } from '../lib/supervisor-console';
|
||||
import { InternalInconsistencyError, StatusError } from '../lib/errors';
|
||||
import { getRequestInstance } from '../lib/request';
|
||||
import { setTimeout } from 'timers/promises';
|
||||
import { readFile } from 'fs/promises';
|
||||
|
||||
import { DeviceState } from '../types';
|
||||
import * as config from '../config';
|
||||
@ -14,8 +11,13 @@ import { SchemaTypeKey, SchemaReturn } from '../config/schema-type';
|
||||
import * as eventTracker from '../event-tracker';
|
||||
import * as deviceState from '../device-state';
|
||||
|
||||
import { withBackoff, OnFailureInfo } from '../lib/backoff';
|
||||
import { log } from '../lib/supervisor-console';
|
||||
import { InternalInconsistencyError, StatusError } from '../lib/errors';
|
||||
import { getRequestInstance } from '../lib/request';
|
||||
import { shallowDiff, prune, empty } from '../lib/json';
|
||||
import { setTimeout } from 'timers/promises';
|
||||
import { pathOnRoot } from '../lib/host-utils';
|
||||
import { touch, writeAndSyncFile } from '../lib/fs-utils';
|
||||
|
||||
let lastReport: DeviceState = {};
|
||||
let lastReportTime: number = -Infinity;
|
||||
@ -26,6 +28,8 @@ const maxReportFrequency = 10 * 1000;
|
||||
// How often can we report metrics to the server in ms; mirrors server setting.
|
||||
// Metrics are low priority, so less frequent than maxReportFrequency.
|
||||
const maxMetricsFrequency = 300 * 1000;
|
||||
// Path of the cache for last reported state
|
||||
const CACHE_PATH = pathOnRoot('/tmp/balena-supervisor/state-report-cache');
|
||||
|
||||
// TODO: This counter is read by the healthcheck to see if the
|
||||
// supervisor is having issues to connect. We have removed the
|
||||
@ -109,6 +113,12 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) {
|
||||
await report({ body: stateDiff, opts });
|
||||
lastReportTime = performance.now();
|
||||
lastReport = currentState;
|
||||
|
||||
// Cache last report so it survives Supervisor restart.
|
||||
// On Supervisor startup, Supervisor will be able to diff between the
|
||||
// cached report and thereby report less unnecessary data.
|
||||
await cache(currentState);
|
||||
|
||||
log.info('Reported current state to the cloud');
|
||||
};
|
||||
|
||||
@ -128,6 +138,43 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache last reported current state to CACHE_PATH
|
||||
*/
|
||||
async function cache(state: DeviceState) {
|
||||
try {
|
||||
await writeAndSyncFile(CACHE_PATH, JSON.stringify(state));
|
||||
} catch (e: unknown) {
|
||||
log.debug(`Failed to cache last reported state: ${(e as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get last cached state report from CACHE_PATH
|
||||
*/
|
||||
async function getCache(): Promise<DeviceState> {
|
||||
try {
|
||||
// Touch the file, which will create it if it doesn't exist
|
||||
await touch(CACHE_PATH);
|
||||
|
||||
// Get last reported current state
|
||||
const rawStateCache = await readFile(CACHE_PATH, 'utf-8');
|
||||
const state = JSON.parse(rawStateCache);
|
||||
|
||||
// Return current state cache if valid
|
||||
if (!DeviceState.is(state)) {
|
||||
throw new Error();
|
||||
}
|
||||
log.debug('Retrieved last reported state from cache');
|
||||
return state;
|
||||
} catch {
|
||||
log.debug(
|
||||
'Could not retrieve last reported state from cache, proceeding with empty cache',
|
||||
);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function handleRetry(retryInfo: OnFailureInfo) {
|
||||
if (retryInfo.error instanceof StatusError) {
|
||||
// We don't want these errors to be classed as a report error, as this will cause
|
||||
@ -166,6 +213,9 @@ export async function startReporting() {
|
||||
throw new InternalInconsistencyError('No uuid found for local device');
|
||||
}
|
||||
|
||||
// Get last reported state from cache
|
||||
lastReport = await getCache();
|
||||
|
||||
let reportPending = false;
|
||||
// Reports current state if not already sending and prevents a state change
|
||||
// from exceeding report frequency. Returns true if sent; otherwise false.
|
||||
|
@ -54,63 +54,6 @@ export interface DeviceLegacyState {
|
||||
commit?: string;
|
||||
}
|
||||
|
||||
export type ServiceState = {
|
||||
image: string;
|
||||
status: string;
|
||||
download_progress?: number | null;
|
||||
};
|
||||
|
||||
export type ReleaseState = {
|
||||
services: {
|
||||
[serviceName: string]: ServiceState;
|
||||
};
|
||||
};
|
||||
|
||||
export type ReleasesState = {
|
||||
[releaseUuid: string]: ReleaseState;
|
||||
};
|
||||
|
||||
export type AppState = {
|
||||
release_uuid?: string;
|
||||
releases: ReleasesState;
|
||||
};
|
||||
|
||||
export type DeviceReport = {
|
||||
name?: string;
|
||||
status?: string;
|
||||
os_version?: string | null; // TODO: Should these purely come from the os app?
|
||||
os_variant?: string | null; // TODO: Should these purely come from the os app?
|
||||
supervisor_version?: string; // TODO: Should this purely come from the supervisor app?
|
||||
provisioning_progress?: number | null; // TODO: should this be reported as part of the os app?
|
||||
provisioning_state?: string; // TODO: should this be reported as part of the os app?
|
||||
ip_address?: string;
|
||||
mac_address?: string | null;
|
||||
api_port?: number; // TODO: should this be reported as part of the supervisor app?
|
||||
api_secret?: string | null; // TODO: should this be reported as part of the supervisor app?
|
||||
logs_channel?: string | null; // TODO: should this be reported as part of the supervisor app? or should it not be reported anymore at all?
|
||||
memory_usage?: number;
|
||||
memory_total?: number;
|
||||
storage_block_device?: string;
|
||||
storage_usage?: number;
|
||||
storage_total?: number;
|
||||
cpu_temp?: number;
|
||||
cpu_usage?: number;
|
||||
cpu_id?: string;
|
||||
is_undervolted?: boolean;
|
||||
// TODO: these are ignored by the API but are used by supervisor local API
|
||||
update_failed?: boolean;
|
||||
update_pending?: boolean;
|
||||
update_downloaded?: boolean;
|
||||
};
|
||||
|
||||
export type DeviceState = {
|
||||
[deviceUuid: string]: DeviceReport & {
|
||||
apps?: {
|
||||
[appUuid: string]: AppState;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
// Return a type with a default value
|
||||
export const withDefault = <T extends t.Any>(
|
||||
type: T,
|
||||
@ -162,6 +105,80 @@ const fromType = <T extends object>(name: string) =>
|
||||
// Alias short string to UUID so code reads more clearly
|
||||
export const UUID = ShortString;
|
||||
|
||||
/*****************
|
||||
* Current state *
|
||||
*****************/
|
||||
const ServiceState = t.intersection([
|
||||
t.type({
|
||||
image: t.string,
|
||||
status: t.string,
|
||||
}),
|
||||
t.partial({
|
||||
download_progress: t.union([t.number, t.null]),
|
||||
}),
|
||||
]);
|
||||
export type ServiceState = t.TypeOf<typeof ServiceState>;
|
||||
|
||||
const ReleaseState = t.type({
|
||||
services: t.record(DockerName, ServiceState),
|
||||
});
|
||||
export type ReleaseState = t.TypeOf<typeof ReleaseState>;
|
||||
|
||||
const ReleasesState = t.record(UUID, ReleaseState);
|
||||
export type ReleasesState = t.TypeOf<typeof ReleasesState>;
|
||||
|
||||
const AppState = t.intersection([
|
||||
t.type({
|
||||
releases: ReleasesState,
|
||||
}),
|
||||
t.partial({
|
||||
release_uuid: UUID,
|
||||
}),
|
||||
]);
|
||||
export type AppState = t.TypeOf<typeof AppState>;
|
||||
|
||||
const DeviceReport = t.partial({
|
||||
name: t.string,
|
||||
status: t.string,
|
||||
os_version: t.union([t.string, t.null]),
|
||||
os_variant: t.union([t.string, t.null]),
|
||||
supervisor_version: t.string,
|
||||
provisioning_progress: t.union([t.number, t.null]),
|
||||
provisioning_state: t.string,
|
||||
ip_address: t.string,
|
||||
mac_address: t.union([t.string, t.null]),
|
||||
api_port: t.number,
|
||||
api_secret: t.union([t.string, t.null]),
|
||||
logs_channel: t.union([t.string, t.null]),
|
||||
memory_usage: t.number,
|
||||
memory_total: t.number,
|
||||
storage_block_device: t.string,
|
||||
storage_usage: t.number,
|
||||
storage_total: t.number,
|
||||
cpu_temp: t.number,
|
||||
cpu_usage: t.number,
|
||||
cpu_id: t.string,
|
||||
is_undervolted: t.boolean,
|
||||
update_failed: t.boolean,
|
||||
update_pending: t.boolean,
|
||||
update_downloaded: t.boolean,
|
||||
});
|
||||
export type DeviceReport = t.TypeOf<typeof DeviceReport>;
|
||||
|
||||
export const DeviceState = t.record(
|
||||
UUID,
|
||||
t.intersection([
|
||||
DeviceReport,
|
||||
t.partial({
|
||||
apps: t.record(UUID, AppState),
|
||||
}),
|
||||
]),
|
||||
);
|
||||
export type DeviceState = t.TypeOf<typeof DeviceState>;
|
||||
|
||||
/****************
|
||||
* Target state *
|
||||
****************/
|
||||
/**
|
||||
* A target service has docker image, a set of environment variables
|
||||
* and labels as well as one or more configurations
|
||||
|
Loading…
Reference in New Issue
Block a user