mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-05-29 13:54:18 +00:00
Merge pull request #2225 from balena-os/cache-last-reported-current-state
Cache last reported current state
This commit is contained in:
commit
b53bd31332
@ -2,11 +2,8 @@ import * as url from 'url';
|
|||||||
import * as _ from 'lodash';
|
import * as _ from 'lodash';
|
||||||
import { CoreOptions } from 'request';
|
import { CoreOptions } from 'request';
|
||||||
import { performance } from 'perf_hooks';
|
import { performance } from 'perf_hooks';
|
||||||
|
import { setTimeout } from 'timers/promises';
|
||||||
import { withBackoff, OnFailureInfo } from '../lib/backoff';
|
import { readFile } from 'fs/promises';
|
||||||
import { log } from '../lib/supervisor-console';
|
|
||||||
import { InternalInconsistencyError, StatusError } from '../lib/errors';
|
|
||||||
import { getRequestInstance } from '../lib/request';
|
|
||||||
|
|
||||||
import { DeviceState } from '../types';
|
import { DeviceState } from '../types';
|
||||||
import * as config from '../config';
|
import * as config from '../config';
|
||||||
@ -14,8 +11,13 @@ import { SchemaTypeKey, SchemaReturn } from '../config/schema-type';
|
|||||||
import * as eventTracker from '../event-tracker';
|
import * as eventTracker from '../event-tracker';
|
||||||
import * as deviceState from '../device-state';
|
import * as deviceState from '../device-state';
|
||||||
|
|
||||||
|
import { withBackoff, OnFailureInfo } from '../lib/backoff';
|
||||||
|
import { log } from '../lib/supervisor-console';
|
||||||
|
import { InternalInconsistencyError, StatusError } from '../lib/errors';
|
||||||
|
import { getRequestInstance } from '../lib/request';
|
||||||
import { shallowDiff, prune, empty } from '../lib/json';
|
import { shallowDiff, prune, empty } from '../lib/json';
|
||||||
import { setTimeout } from 'timers/promises';
|
import { pathOnRoot } from '../lib/host-utils';
|
||||||
|
import { touch, writeAndSyncFile } from '../lib/fs-utils';
|
||||||
|
|
||||||
let lastReport: DeviceState = {};
|
let lastReport: DeviceState = {};
|
||||||
let lastReportTime: number = -Infinity;
|
let lastReportTime: number = -Infinity;
|
||||||
@ -26,6 +28,8 @@ const maxReportFrequency = 10 * 1000;
|
|||||||
// How often can we report metrics to the server in ms; mirrors server setting.
|
// How often can we report metrics to the server in ms; mirrors server setting.
|
||||||
// Metrics are low priority, so less frequent than maxReportFrequency.
|
// Metrics are low priority, so less frequent than maxReportFrequency.
|
||||||
const maxMetricsFrequency = 300 * 1000;
|
const maxMetricsFrequency = 300 * 1000;
|
||||||
|
// Path of the cache for last reported state
|
||||||
|
const CACHE_PATH = pathOnRoot('/tmp/balena-supervisor/state-report-cache');
|
||||||
|
|
||||||
// TODO: This counter is read by the healthcheck to see if the
|
// TODO: This counter is read by the healthcheck to see if the
|
||||||
// supervisor is having issues to connect. We have removed the
|
// supervisor is having issues to connect. We have removed the
|
||||||
@ -109,6 +113,12 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) {
|
|||||||
await report({ body: stateDiff, opts });
|
await report({ body: stateDiff, opts });
|
||||||
lastReportTime = performance.now();
|
lastReportTime = performance.now();
|
||||||
lastReport = currentState;
|
lastReport = currentState;
|
||||||
|
|
||||||
|
// Cache last report so it survives Supervisor restart.
|
||||||
|
// On Supervisor startup, Supervisor will be able to diff between the
|
||||||
|
// cached report and thereby report less unnecessary data.
|
||||||
|
await cache(currentState);
|
||||||
|
|
||||||
log.info('Reported current state to the cloud');
|
log.info('Reported current state to the cloud');
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -128,6 +138,43 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cache last reported current state to CACHE_PATH
|
||||||
|
*/
|
||||||
|
async function cache(state: DeviceState) {
|
||||||
|
try {
|
||||||
|
await writeAndSyncFile(CACHE_PATH, JSON.stringify(state));
|
||||||
|
} catch (e: unknown) {
|
||||||
|
log.debug(`Failed to cache last reported state: ${(e as Error).message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get last cached state report from CACHE_PATH
|
||||||
|
*/
|
||||||
|
async function getCache(): Promise<DeviceState> {
|
||||||
|
try {
|
||||||
|
// Touch the file, which will create it if it doesn't exist
|
||||||
|
await touch(CACHE_PATH);
|
||||||
|
|
||||||
|
// Get last reported current state
|
||||||
|
const rawStateCache = await readFile(CACHE_PATH, 'utf-8');
|
||||||
|
const state = JSON.parse(rawStateCache);
|
||||||
|
|
||||||
|
// Return current state cache if valid
|
||||||
|
if (!DeviceState.is(state)) {
|
||||||
|
throw new Error();
|
||||||
|
}
|
||||||
|
log.debug('Retrieved last reported state from cache');
|
||||||
|
return state;
|
||||||
|
} catch {
|
||||||
|
log.debug(
|
||||||
|
'Could not retrieve last reported state from cache, proceeding with empty cache',
|
||||||
|
);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function handleRetry(retryInfo: OnFailureInfo) {
|
function handleRetry(retryInfo: OnFailureInfo) {
|
||||||
if (retryInfo.error instanceof StatusError) {
|
if (retryInfo.error instanceof StatusError) {
|
||||||
// We don't want these errors to be classed as a report error, as this will cause
|
// We don't want these errors to be classed as a report error, as this will cause
|
||||||
@ -166,6 +213,9 @@ export async function startReporting() {
|
|||||||
throw new InternalInconsistencyError('No uuid found for local device');
|
throw new InternalInconsistencyError('No uuid found for local device');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get last reported state from cache
|
||||||
|
lastReport = await getCache();
|
||||||
|
|
||||||
let reportPending = false;
|
let reportPending = false;
|
||||||
// Reports current state if not already sending and prevents a state change
|
// Reports current state if not already sending and prevents a state change
|
||||||
// from exceeding report frequency. Returns true if sent; otherwise false.
|
// from exceeding report frequency. Returns true if sent; otherwise false.
|
||||||
|
@ -54,63 +54,6 @@ export interface DeviceLegacyState {
|
|||||||
commit?: string;
|
commit?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type ServiceState = {
|
|
||||||
image: string;
|
|
||||||
status: string;
|
|
||||||
download_progress?: number | null;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type ReleaseState = {
|
|
||||||
services: {
|
|
||||||
[serviceName: string]: ServiceState;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
export type ReleasesState = {
|
|
||||||
[releaseUuid: string]: ReleaseState;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type AppState = {
|
|
||||||
release_uuid?: string;
|
|
||||||
releases: ReleasesState;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type DeviceReport = {
|
|
||||||
name?: string;
|
|
||||||
status?: string;
|
|
||||||
os_version?: string | null; // TODO: Should these purely come from the os app?
|
|
||||||
os_variant?: string | null; // TODO: Should these purely come from the os app?
|
|
||||||
supervisor_version?: string; // TODO: Should this purely come from the supervisor app?
|
|
||||||
provisioning_progress?: number | null; // TODO: should this be reported as part of the os app?
|
|
||||||
provisioning_state?: string; // TODO: should this be reported as part of the os app?
|
|
||||||
ip_address?: string;
|
|
||||||
mac_address?: string | null;
|
|
||||||
api_port?: number; // TODO: should this be reported as part of the supervisor app?
|
|
||||||
api_secret?: string | null; // TODO: should this be reported as part of the supervisor app?
|
|
||||||
logs_channel?: string | null; // TODO: should this be reported as part of the supervisor app? or should it not be reported anymore at all?
|
|
||||||
memory_usage?: number;
|
|
||||||
memory_total?: number;
|
|
||||||
storage_block_device?: string;
|
|
||||||
storage_usage?: number;
|
|
||||||
storage_total?: number;
|
|
||||||
cpu_temp?: number;
|
|
||||||
cpu_usage?: number;
|
|
||||||
cpu_id?: string;
|
|
||||||
is_undervolted?: boolean;
|
|
||||||
// TODO: these are ignored by the API but are used by supervisor local API
|
|
||||||
update_failed?: boolean;
|
|
||||||
update_pending?: boolean;
|
|
||||||
update_downloaded?: boolean;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type DeviceState = {
|
|
||||||
[deviceUuid: string]: DeviceReport & {
|
|
||||||
apps?: {
|
|
||||||
[appUuid: string]: AppState;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
// Return a type with a default value
|
// Return a type with a default value
|
||||||
export const withDefault = <T extends t.Any>(
|
export const withDefault = <T extends t.Any>(
|
||||||
type: T,
|
type: T,
|
||||||
@ -162,6 +105,80 @@ const fromType = <T extends object>(name: string) =>
|
|||||||
// Alias short string to UUID so code reads more clearly
|
// Alias short string to UUID so code reads more clearly
|
||||||
export const UUID = ShortString;
|
export const UUID = ShortString;
|
||||||
|
|
||||||
|
/*****************
|
||||||
|
* Current state *
|
||||||
|
*****************/
|
||||||
|
const ServiceState = t.intersection([
|
||||||
|
t.type({
|
||||||
|
image: t.string,
|
||||||
|
status: t.string,
|
||||||
|
}),
|
||||||
|
t.partial({
|
||||||
|
download_progress: t.union([t.number, t.null]),
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
export type ServiceState = t.TypeOf<typeof ServiceState>;
|
||||||
|
|
||||||
|
const ReleaseState = t.type({
|
||||||
|
services: t.record(DockerName, ServiceState),
|
||||||
|
});
|
||||||
|
export type ReleaseState = t.TypeOf<typeof ReleaseState>;
|
||||||
|
|
||||||
|
const ReleasesState = t.record(UUID, ReleaseState);
|
||||||
|
export type ReleasesState = t.TypeOf<typeof ReleasesState>;
|
||||||
|
|
||||||
|
const AppState = t.intersection([
|
||||||
|
t.type({
|
||||||
|
releases: ReleasesState,
|
||||||
|
}),
|
||||||
|
t.partial({
|
||||||
|
release_uuid: UUID,
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
export type AppState = t.TypeOf<typeof AppState>;
|
||||||
|
|
||||||
|
const DeviceReport = t.partial({
|
||||||
|
name: t.string,
|
||||||
|
status: t.string,
|
||||||
|
os_version: t.union([t.string, t.null]),
|
||||||
|
os_variant: t.union([t.string, t.null]),
|
||||||
|
supervisor_version: t.string,
|
||||||
|
provisioning_progress: t.union([t.number, t.null]),
|
||||||
|
provisioning_state: t.string,
|
||||||
|
ip_address: t.string,
|
||||||
|
mac_address: t.union([t.string, t.null]),
|
||||||
|
api_port: t.number,
|
||||||
|
api_secret: t.union([t.string, t.null]),
|
||||||
|
logs_channel: t.union([t.string, t.null]),
|
||||||
|
memory_usage: t.number,
|
||||||
|
memory_total: t.number,
|
||||||
|
storage_block_device: t.string,
|
||||||
|
storage_usage: t.number,
|
||||||
|
storage_total: t.number,
|
||||||
|
cpu_temp: t.number,
|
||||||
|
cpu_usage: t.number,
|
||||||
|
cpu_id: t.string,
|
||||||
|
is_undervolted: t.boolean,
|
||||||
|
update_failed: t.boolean,
|
||||||
|
update_pending: t.boolean,
|
||||||
|
update_downloaded: t.boolean,
|
||||||
|
});
|
||||||
|
export type DeviceReport = t.TypeOf<typeof DeviceReport>;
|
||||||
|
|
||||||
|
export const DeviceState = t.record(
|
||||||
|
UUID,
|
||||||
|
t.intersection([
|
||||||
|
DeviceReport,
|
||||||
|
t.partial({
|
||||||
|
apps: t.record(UUID, AppState),
|
||||||
|
}),
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
export type DeviceState = t.TypeOf<typeof DeviceState>;
|
||||||
|
|
||||||
|
/****************
|
||||||
|
* Target state *
|
||||||
|
****************/
|
||||||
/**
|
/**
|
||||||
* A target service has docker image, a set of environment variables
|
* A target service has docker image, a set of environment variables
|
||||||
* and labels as well as one or more configurations
|
* and labels as well as one or more configurations
|
||||||
|
Loading…
x
Reference in New Issue
Block a user