From d440776881b3925e50fbaefd441d524f5169c385 Mon Sep 17 00:00:00 2001 From: Christina Ying Wang Date: Wed, 8 Nov 2023 16:00:54 -0800 Subject: [PATCH 1/2] Convert current state types to io-ts Signed-off-by: Christina Ying Wang --- src/types/state.ts | 131 +++++++++++++++++++++++++-------------------- 1 file changed, 74 insertions(+), 57 deletions(-) diff --git a/src/types/state.ts b/src/types/state.ts index 23956daa..609a05ca 100644 --- a/src/types/state.ts +++ b/src/types/state.ts @@ -54,63 +54,6 @@ export interface DeviceLegacyState { commit?: string; } -export type ServiceState = { - image: string; - status: string; - download_progress?: number | null; -}; - -export type ReleaseState = { - services: { - [serviceName: string]: ServiceState; - }; -}; - -export type ReleasesState = { - [releaseUuid: string]: ReleaseState; -}; - -export type AppState = { - release_uuid?: string; - releases: ReleasesState; -}; - -export type DeviceReport = { - name?: string; - status?: string; - os_version?: string | null; // TODO: Should these purely come from the os app? - os_variant?: string | null; // TODO: Should these purely come from the os app? - supervisor_version?: string; // TODO: Should this purely come from the supervisor app? - provisioning_progress?: number | null; // TODO: should this be reported as part of the os app? - provisioning_state?: string; // TODO: should this be reported as part of the os app? - ip_address?: string; - mac_address?: string | null; - api_port?: number; // TODO: should this be reported as part of the supervisor app? - api_secret?: string | null; // TODO: should this be reported as part of the supervisor app? - logs_channel?: string | null; // TODO: should this be reported as part of the supervisor app? or should it not be reported anymore at all? - memory_usage?: number; - memory_total?: number; - storage_block_device?: string; - storage_usage?: number; - storage_total?: number; - cpu_temp?: number; - cpu_usage?: number; - cpu_id?: string; - is_undervolted?: boolean; - // TODO: these are ignored by the API but are used by supervisor local API - update_failed?: boolean; - update_pending?: boolean; - update_downloaded?: boolean; -}; - -export type DeviceState = { - [deviceUuid: string]: DeviceReport & { - apps?: { - [appUuid: string]: AppState; - }; - }; -}; - // Return a type with a default value export const withDefault = ( type: T, @@ -162,6 +105,80 @@ const fromType = (name: string) => // Alias short string to UUID so code reads more clearly export const UUID = ShortString; +/***************** + * Current state * + *****************/ +const ServiceState = t.intersection([ + t.type({ + image: t.string, + status: t.string, + }), + t.partial({ + download_progress: t.union([t.number, t.null]), + }), +]); +export type ServiceState = t.TypeOf; + +const ReleaseState = t.type({ + services: t.record(DockerName, ServiceState), +}); +export type ReleaseState = t.TypeOf; + +const ReleasesState = t.record(UUID, ReleaseState); +export type ReleasesState = t.TypeOf; + +const AppState = t.intersection([ + t.type({ + releases: ReleasesState, + }), + t.partial({ + release_uuid: UUID, + }), +]); +export type AppState = t.TypeOf; + +const DeviceReport = t.partial({ + name: t.string, + status: t.string, + os_version: t.union([t.string, t.null]), + os_variant: t.union([t.string, t.null]), + supervisor_version: t.string, + provisioning_progress: t.union([t.number, t.null]), + provisioning_state: t.string, + ip_address: t.string, + mac_address: t.union([t.string, t.null]), + api_port: t.number, + api_secret: t.union([t.string, t.null]), + logs_channel: t.union([t.string, t.null]), + memory_usage: t.number, + memory_total: t.number, + storage_block_device: t.string, + storage_usage: t.number, + storage_total: t.number, + cpu_temp: t.number, + cpu_usage: t.number, + cpu_id: t.string, + is_undervolted: t.boolean, + update_failed: t.boolean, + update_pending: t.boolean, + update_downloaded: t.boolean, +}); +export type DeviceReport = t.TypeOf; + +export const DeviceState = t.record( + UUID, + t.intersection([ + DeviceReport, + t.partial({ + apps: t.record(UUID, AppState), + }), + ]), +); +export type DeviceState = t.TypeOf; + +/**************** + * Target state * + ****************/ /** * A target service has docker image, a set of environment variables * and labels as well as one or more configurations From eb8ad11cd702e7d5ffba499c6b2d2fcadc030dd4 Mon Sep 17 00:00:00 2001 From: Christina Ying Wang Date: Wed, 8 Nov 2023 16:51:39 -0800 Subject: [PATCH 2/2] Cache last reported current state to /mnt/root/tmp Whenever the Supervisor reports current state, it diffs the current state with its last reported current state. However, when the Supervisor starts up, there is no last reported state, since that last report is stored in process memory. Caching the last report in a location that survives Supervisor restarts will reduce the current report bandwidth used on startup. Change-type: patch Signed-off-by: Christina Ying Wang --- src/api-binder/report.ts | 62 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/src/api-binder/report.ts b/src/api-binder/report.ts index 41e85338..4370728c 100644 --- a/src/api-binder/report.ts +++ b/src/api-binder/report.ts @@ -2,11 +2,8 @@ import * as url from 'url'; import * as _ from 'lodash'; import { CoreOptions } from 'request'; import { performance } from 'perf_hooks'; - -import { withBackoff, OnFailureInfo } from '../lib/backoff'; -import { log } from '../lib/supervisor-console'; -import { InternalInconsistencyError, StatusError } from '../lib/errors'; -import { getRequestInstance } from '../lib/request'; +import { setTimeout } from 'timers/promises'; +import { readFile } from 'fs/promises'; import { DeviceState } from '../types'; import * as config from '../config'; @@ -14,8 +11,13 @@ import { SchemaTypeKey, SchemaReturn } from '../config/schema-type'; import * as eventTracker from '../event-tracker'; import * as deviceState from '../device-state'; +import { withBackoff, OnFailureInfo } from '../lib/backoff'; +import { log } from '../lib/supervisor-console'; +import { InternalInconsistencyError, StatusError } from '../lib/errors'; +import { getRequestInstance } from '../lib/request'; import { shallowDiff, prune, empty } from '../lib/json'; -import { setTimeout } from 'timers/promises'; +import { pathOnRoot } from '../lib/host-utils'; +import { touch, writeAndSyncFile } from '../lib/fs-utils'; let lastReport: DeviceState = {}; let lastReportTime: number = -Infinity; @@ -26,6 +28,8 @@ const maxReportFrequency = 10 * 1000; // How often can we report metrics to the server in ms; mirrors server setting. // Metrics are low priority, so less frequent than maxReportFrequency. const maxMetricsFrequency = 300 * 1000; +// Path of the cache for last reported state +const CACHE_PATH = pathOnRoot('/tmp/balena-supervisor/state-report-cache'); // TODO: This counter is read by the healthcheck to see if the // supervisor is having issues to connect. We have removed the @@ -109,6 +113,12 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) { await report({ body: stateDiff, opts }); lastReportTime = performance.now(); lastReport = currentState; + + // Cache last report so it survives Supervisor restart. + // On Supervisor startup, Supervisor will be able to diff between the + // cached report and thereby report less unnecessary data. + await cache(currentState); + log.info('Reported current state to the cloud'); }; @@ -128,6 +138,43 @@ async function reportCurrentState(opts: StateReportOpts, uuid: string) { } } +/** + * Cache last reported current state to CACHE_PATH + */ +async function cache(state: DeviceState) { + try { + await writeAndSyncFile(CACHE_PATH, JSON.stringify(state)); + } catch (e: unknown) { + log.debug(`Failed to cache last reported state: ${(e as Error).message}`); + } +} + +/** + * Get last cached state report from CACHE_PATH + */ +async function getCache(): Promise { + try { + // Touch the file, which will create it if it doesn't exist + await touch(CACHE_PATH); + + // Get last reported current state + const rawStateCache = await readFile(CACHE_PATH, 'utf-8'); + const state = JSON.parse(rawStateCache); + + // Return current state cache if valid + if (!DeviceState.is(state)) { + throw new Error(); + } + log.debug('Retrieved last reported state from cache'); + return state; + } catch { + log.debug( + 'Could not retrieve last reported state from cache, proceeding with empty cache', + ); + return {}; + } +} + function handleRetry(retryInfo: OnFailureInfo) { if (retryInfo.error instanceof StatusError) { // We don't want these errors to be classed as a report error, as this will cause @@ -166,6 +213,9 @@ export async function startReporting() { throw new InternalInconsistencyError('No uuid found for local device'); } + // Get last reported state from cache + lastReport = await getCache(); + let reportPending = false; // Reports current state if not already sending and prevents a state change // from exceeding report frequency. Returns true if sent; otherwise false.