Merge pull request #2225 from balena-os/cache-last-reported-current-state

Cache last reported current state
This commit is contained in:
flowzone-app[bot] 2023-11-22 00:31:26 +00:00 committed by GitHub
commit b53bd31332
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 130 additions and 63 deletions

View File

@ -2,11 +2,8 @@ import * as url from 'url';
import * as _ from 'lodash';
import { CoreOptions } from 'request';
import { performance } from 'perf_hooks';
import { withBackoff, OnFailureInfo } from '../lib/backoff';
import { log } from '../lib/supervisor-console';
import { InternalInconsistencyError, StatusError } from '../lib/errors';
import { getRequestInstance } from '../lib/request';
import { setTimeout } from 'timers/promises';
import { readFile } from 'fs/promises';
import { DeviceState } from '../types';
import * as config from '../config';
@ -14,8 +11,13 @@ import { SchemaTypeKey, SchemaReturn } from '../config/schema-type';
import * as eventTracker from '../event-tracker';
import * as deviceState from '../device-state';
import { withBackoff, OnFailureInfo } from '../lib/backoff';
import { log } from '../lib/supervisor-console';
import { InternalInconsistencyError, StatusError } from '../lib/errors';
import { getRequestInstance } from '../lib/request';
import { shallowDiff, prune, empty } from '../lib/json';
import { setTimeout } from 'timers/promises';
import { pathOnRoot } from '../lib/host-utils';
import { touch, writeAndSyncFile } from '../lib/fs-utils';
let lastReport: DeviceState = {};
let lastReportTime: number = -Infinity;
@ -26,6 +28,8 @@ const maxReportFrequency = 10 * 1000;
// How often can we report metrics to the server in ms; mirrors server setting.
// Metrics are low priority, so less frequent than maxReportFrequency.
const maxMetricsFrequency = 300 * 1000;
// Path of the cache for last reported state
const CACHE_PATH = pathOnRoot('/tmp/balena-supervisor/state-report-cache');
// TODO: This counter is read by the healthcheck to see if the
// supervisor is having issues to connect. We have removed the
@ -109,6 +113,12 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) {
await report({ body: stateDiff, opts });
lastReportTime = performance.now();
lastReport = currentState;
// Cache last report so it survives Supervisor restart.
// On Supervisor startup, Supervisor will be able to diff between the
// cached report and thereby report less unnecessary data.
await cache(currentState);
log.info('Reported current state to the cloud');
};
@ -128,6 +138,43 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) {
}
}
/**
* Cache last reported current state to CACHE_PATH
*/
async function cache(state: DeviceState) {
try {
await writeAndSyncFile(CACHE_PATH, JSON.stringify(state));
} catch (e: unknown) {
log.debug(`Failed to cache last reported state: ${(e as Error).message}`);
}
}
/**
* Get last cached state report from CACHE_PATH
*/
async function getCache(): Promise<DeviceState> {
try {
// Touch the file, which will create it if it doesn't exist
await touch(CACHE_PATH);
// Get last reported current state
const rawStateCache = await readFile(CACHE_PATH, 'utf-8');
const state = JSON.parse(rawStateCache);
// Return current state cache if valid
if (!DeviceState.is(state)) {
throw new Error();
}
log.debug('Retrieved last reported state from cache');
return state;
} catch {
log.debug(
'Could not retrieve last reported state from cache, proceeding with empty cache',
);
return {};
}
}
function handleRetry(retryInfo: OnFailureInfo) {
if (retryInfo.error instanceof StatusError) {
// We don't want these errors to be classed as a report error, as this will cause
@ -166,6 +213,9 @@ export async function startReporting() {
throw new InternalInconsistencyError('No uuid found for local device');
}
// Get last reported state from cache
lastReport = await getCache();
let reportPending = false;
// Reports current state if not already sending and prevents a state change
// from exceeding report frequency. Returns true if sent; otherwise false.

View File

@ -54,63 +54,6 @@ export interface DeviceLegacyState {
commit?: string;
}
export type ServiceState = {
image: string;
status: string;
download_progress?: number | null;
};
export type ReleaseState = {
services: {
[serviceName: string]: ServiceState;
};
};
export type ReleasesState = {
[releaseUuid: string]: ReleaseState;
};
export type AppState = {
release_uuid?: string;
releases: ReleasesState;
};
export type DeviceReport = {
name?: string;
status?: string;
os_version?: string | null; // TODO: Should these purely come from the os app?
os_variant?: string | null; // TODO: Should these purely come from the os app?
supervisor_version?: string; // TODO: Should this purely come from the supervisor app?
provisioning_progress?: number | null; // TODO: should this be reported as part of the os app?
provisioning_state?: string; // TODO: should this be reported as part of the os app?
ip_address?: string;
mac_address?: string | null;
api_port?: number; // TODO: should this be reported as part of the supervisor app?
api_secret?: string | null; // TODO: should this be reported as part of the supervisor app?
logs_channel?: string | null; // TODO: should this be reported as part of the supervisor app? or should it not be reported anymore at all?
memory_usage?: number;
memory_total?: number;
storage_block_device?: string;
storage_usage?: number;
storage_total?: number;
cpu_temp?: number;
cpu_usage?: number;
cpu_id?: string;
is_undervolted?: boolean;
// TODO: these are ignored by the API but are used by supervisor local API
update_failed?: boolean;
update_pending?: boolean;
update_downloaded?: boolean;
};
export type DeviceState = {
[deviceUuid: string]: DeviceReport & {
apps?: {
[appUuid: string]: AppState;
};
};
};
// Return a type with a default value
export const withDefault = <T extends t.Any>(
type: T,
@ -162,6 +105,80 @@ const fromType = <T extends object>(name: string) =>
// Alias short string to UUID so code reads more clearly
export const UUID = ShortString;
/*****************
* Current state *
*****************/
const ServiceState = t.intersection([
t.type({
image: t.string,
status: t.string,
}),
t.partial({
download_progress: t.union([t.number, t.null]),
}),
]);
export type ServiceState = t.TypeOf<typeof ServiceState>;
const ReleaseState = t.type({
services: t.record(DockerName, ServiceState),
});
export type ReleaseState = t.TypeOf<typeof ReleaseState>;
const ReleasesState = t.record(UUID, ReleaseState);
export type ReleasesState = t.TypeOf<typeof ReleasesState>;
const AppState = t.intersection([
t.type({
releases: ReleasesState,
}),
t.partial({
release_uuid: UUID,
}),
]);
export type AppState = t.TypeOf<typeof AppState>;
const DeviceReport = t.partial({
name: t.string,
status: t.string,
os_version: t.union([t.string, t.null]),
os_variant: t.union([t.string, t.null]),
supervisor_version: t.string,
provisioning_progress: t.union([t.number, t.null]),
provisioning_state: t.string,
ip_address: t.string,
mac_address: t.union([t.string, t.null]),
api_port: t.number,
api_secret: t.union([t.string, t.null]),
logs_channel: t.union([t.string, t.null]),
memory_usage: t.number,
memory_total: t.number,
storage_block_device: t.string,
storage_usage: t.number,
storage_total: t.number,
cpu_temp: t.number,
cpu_usage: t.number,
cpu_id: t.string,
is_undervolted: t.boolean,
update_failed: t.boolean,
update_pending: t.boolean,
update_downloaded: t.boolean,
});
export type DeviceReport = t.TypeOf<typeof DeviceReport>;
export const DeviceState = t.record(
UUID,
t.intersection([
DeviceReport,
t.partial({
apps: t.record(UUID, AppState),
}),
]),
);
export type DeviceState = t.TypeOf<typeof DeviceState>;
/****************
* Target state *
****************/
/**
* A target service has docker image, a set of environment variables
* and labels as well as one or more configurations