mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-03-15 00:36:35 +00:00
Compare commits
38 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
b8032edc04 | ||
|
175872b358 | ||
|
ae337a1dd7 | ||
|
bdbc6a4ba4 | ||
|
978652b292 | ||
|
7771c0e96b | ||
|
026dc0aed2 | ||
|
5ef6b054fd | ||
|
3cca2b7ecd | ||
|
3d8bd28f5a | ||
|
6d00be2093 | ||
|
f8bdb14335 | ||
|
c88cf6a259 | ||
|
906ce6dc0d | ||
|
49163e92a0 | ||
|
f67e45f432 | ||
|
91335051ac | ||
|
2dc9d275b1 | ||
|
b6f0ecba18 | ||
|
dd0253ff1f | ||
|
5936af37e7 | ||
|
341111f1f9 | ||
|
1fc242200f | ||
|
5c94c61b0a | ||
|
43426a4a26 | ||
|
c57622e226 | ||
|
5fca7c25bc | ||
|
e901c38df0 | ||
|
f99e19f8a9 | ||
|
f4b1acba89 | ||
|
88e821ed8e | ||
|
58824066e0 | ||
|
f71f98777c | ||
|
25e46574ab | ||
|
52081ba15e | ||
|
342a2d4dac | ||
|
e474a9d95d | ||
|
3a3889546d |
2
.github/actions/finalize/action.yml
vendored
2
.github/actions/finalize/action.yml
vendored
@ -13,7 +13,7 @@ inputs:
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- uses: balena-io/deploy-to-balena-action@72b7652cd8b4b0b49376f60fe790eef9ba76e3f0 # v2.0.74
|
||||
- uses: balena-io/deploy-to-balena-action@3cb4217ab3347a885b4fcdc44d5f3a4153145633 # v2.0.92
|
||||
with:
|
||||
balena_token: ${{ fromJSON(inputs.secrets).BALENA_STAGING_TOKEN }}
|
||||
fleet: ${{ env.matrix_value }}
|
||||
|
2
.github/actions/publish/action.yml
vendored
2
.github/actions/publish/action.yml
vendored
@ -13,7 +13,7 @@ inputs:
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- uses: balena-io/deploy-to-balena-action@72b7652cd8b4b0b49376f60fe790eef9ba76e3f0 # v2.0.74
|
||||
- uses: balena-io/deploy-to-balena-action@3cb4217ab3347a885b4fcdc44d5f3a4153145633 # v2.0.92
|
||||
with:
|
||||
balena_token: ${{ fromJSON(inputs.secrets).BALENA_STAGING_TOKEN }}
|
||||
fleet: ${{ env.matrix_value }}
|
||||
|
@ -1,3 +1,230 @@
|
||||
- commits:
|
||||
- subject: Remove GOT retries on state poll
|
||||
hash: ae337a1dd7743b0ee0a05c32a5ce01965c5bafef
|
||||
body: |
|
||||
The state poll already has retry implementation, making the GOT default
|
||||
unnecessary.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Ensure poll socket timeout is defined early
|
||||
hash: bdbc6a4ba4766f9466891497bc02bd33aff1d4c7
|
||||
body: |
|
||||
We have observed that even when setting the socket timeout on the
|
||||
state poll https request, the timeout is only applied once the socket is
|
||||
connected. This causes issues with Node's auto family selection (happy
|
||||
eyeballs), as the default https timeout is 5s which means that larger
|
||||
[auto select attempt timeout](https://nodejs.org/docs/latest-v22.x/api/net.html#netgetdefaultautoselectfamilyattempttimeout) may result in the socket timing out before all connection attempts have been tried.
|
||||
|
||||
This commit sets a different https Agent for state polling, with a
|
||||
timeout matching the `apiRequestTimeout` used for other request events.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.12.8
|
||||
title: ""
|
||||
date: 2025-03-12T14:50:33.204Z
|
||||
- commits:
|
||||
- subject: Release locks when removing apps
|
||||
hash: 026dc0aed29ce7d66cfdd8616d80d1f5daf3ad46
|
||||
body: |
|
||||
This prevents leftover locks that can prevent other operations from
|
||||
taking place.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.12.7
|
||||
title: ""
|
||||
date: 2025-03-06T19:11:18.704Z
|
||||
- commits:
|
||||
- subject: Log non-API errors during state poll
|
||||
hash: 6d00be20930398699da1006176dac1e81b2dbbd6
|
||||
body: >
|
||||
The supervisor was failing silently if an error happened while
|
||||
establishing the
|
||||
|
||||
connection (e.g. requesting the socket).
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Fix target poll healthcheck
|
||||
hash: f8bdb1433508dcaeff12a78d746256041ba1c414
|
||||
body: |
|
||||
The Target.lastFetch time compared when performing the healthcheck
|
||||
resets any time a poll is attempted no matter the outcome. This changes
|
||||
the behavior so the time is reset only on a successful poll
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.12.6
|
||||
title: ""
|
||||
date: 2025-03-04T14:25:06.565Z
|
||||
- commits:
|
||||
- subject: Decrease balenaCloud api request timeout from 15m to 59s
|
||||
hash: 49163e92a013250f72ca7231e11945b465c4dd45
|
||||
body: |
|
||||
This was mistakenly increased due to confusion between the timeout for
|
||||
requests to the supervisor's api vs the timeout for requests from the
|
||||
supervisor to the balenaCloud api. This separates the two configs and
|
||||
documents the difference between the timeouts whilst also decreasing
|
||||
the timeout for balenaCloud api requests to the correct/expected value
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Pagan Gazzard
|
||||
nested: []
|
||||
version: 16.12.5
|
||||
title: ""
|
||||
date: 2025-03-04T13:35:26.801Z
|
||||
- commits:
|
||||
- subject: Don't revert to regular pull if delta server 401
|
||||
hash: 2dc9d275b15a0802264bcd49e2f0dddbbadd2225
|
||||
body: |
|
||||
If the Supervisor receives a 401 Unauthorized from the delta server
|
||||
when requesting a delta image location, we should surface the error
|
||||
instead of falling back to a regular pull immediately, as there could
|
||||
be an issue with the delta auth token, which refreshes after
|
||||
DELTA_TOKEN_TIMEOUT (10min), or some other edge case.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
Signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
author: Christina Ying Wang
|
||||
nested: []
|
||||
version: 16.12.4
|
||||
title: ""
|
||||
date: 2025-03-03T13:42:18.045Z
|
||||
- commits:
|
||||
- subject: Retry DELTA_APPLY_RETRY_COUNT (3) times during delta apply fail before
|
||||
reverting to regular pull
|
||||
hash: 341111f1f94cd9f17fd7be9b6f21e3bc22c9ad3a
|
||||
body: >
|
||||
This prevents an image download error loop where the delta image on the
|
||||
delta server is present,
|
||||
|
||||
but some aspect of the delta image or the base image on the device does
|
||||
not match up, causing
|
||||
|
||||
the delta to fail to be applied to the base image.
|
||||
|
||||
|
||||
Delta apply errors don't raise status codes as they are thrown from the
|
||||
Engine (although they should),
|
||||
|
||||
so if an error with a status code is raised during this time, throw an
|
||||
error to the handler
|
||||
|
||||
indicating that the delta should be retried until success. Errors with
|
||||
status codes raised during
|
||||
|
||||
this time are largely network related, so falling back to a regular pull
|
||||
won't improve anything.
|
||||
|
||||
|
||||
Upon delta apply errors exceeding DELTA_APPLY_RETRY_COUNT, revert to a
|
||||
regular pull.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
Signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
author: Christina Ying Wang
|
||||
nested: []
|
||||
- subject: Revert to regular pull immediately on delta server failure (code 400s)
|
||||
hash: 1fc242200f78e4219aafc5bb91de8cf0916236af
|
||||
body: >
|
||||
If the delta server responds immediately with HTTP 4xx upon requesting a
|
||||
delta image,
|
||||
|
||||
this means the server is not able to supply the resource, so fall back
|
||||
to a regular pull
|
||||
|
||||
immediately.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
Signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
author: Christina Ying Wang
|
||||
nested: []
|
||||
version: 16.12.3
|
||||
title: ""
|
||||
date: 2025-02-19T20:51:53.085Z
|
||||
- commits:
|
||||
- subject: Update balena-io/deploy-to-balena-action action to v2.0.92
|
||||
hash: c57622e2264e41078e907d6ba8de9d5206bb6293
|
||||
body: |
|
||||
Update balena-io/deploy-to-balena-action from 2.0.74 to 2.0.92
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: balena-renovate[bot]
|
||||
nested: []
|
||||
version: 16.12.2
|
||||
title: ""
|
||||
date: 2025-02-11T01:04:22.736Z
|
||||
- commits:
|
||||
- subject: Pin io-ts version to v2.2.20
|
||||
hash: 88e821ed8e36e10d6429dc31950b5aeed968aa3f
|
||||
body: |
|
||||
gcanti/io-ts#705 fixes an issue with io-ts and non-enumerable
|
||||
properties, but that results in objects with invalid properties to get
|
||||
removed during `decode`, which breaks our validation tests.
|
||||
|
||||
Need to figure out what is the right behavior for us
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update network-manager to v1
|
||||
hash: f71f98777cbf7198745f1dcb8467b8cc62719d85
|
||||
body: ""
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update balena-request and balena-register-device
|
||||
hash: 52081ba15e84be794a906d5cbccc343b24748bba
|
||||
body: ""
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update pinejs-client-request to v8
|
||||
hash: 342a2d4dac737274ab13a8b05eac0f1f036a5075
|
||||
body: ""
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update chai utility modules
|
||||
hash: 3a3889546d8546793914bc2b5da10e202ebb14b1
|
||||
body: |
|
||||
Updating chai will be done in a future PR as it requires overhauling all
|
||||
tests since chai is now ESM
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.12.1
|
||||
title: ""
|
||||
date: 2025-02-10T22:51:51.632Z
|
||||
- commits:
|
||||
- subject: Update contrato to v0.12.0
|
||||
hash: 85fc5784bcd187d086bffbd0c2167ce9eb34650f
|
||||
|
47
CHANGELOG.md
47
CHANGELOG.md
@ -4,6 +4,53 @@ All notable changes to this project will be documented in this file
|
||||
automatically by Versionist. DO NOT EDIT THIS FILE MANUALLY!
|
||||
This project adheres to [Semantic Versioning](http://semver.org/).
|
||||
|
||||
# v16.12.8
|
||||
## (2025-03-12)
|
||||
|
||||
* Remove GOT retries on state poll [Felipe Lalanne]
|
||||
* Ensure poll socket timeout is defined early [Felipe Lalanne]
|
||||
|
||||
# v16.12.7
|
||||
## (2025-03-06)
|
||||
|
||||
* Release locks when removing apps [Felipe Lalanne]
|
||||
|
||||
# v16.12.6
|
||||
## (2025-03-04)
|
||||
|
||||
* Log non-API errors during state poll [Felipe Lalanne]
|
||||
* Fix target poll healthcheck [Felipe Lalanne]
|
||||
|
||||
# v16.12.5
|
||||
## (2025-03-04)
|
||||
|
||||
* Decrease balenaCloud api request timeout from 15m to 59s [Pagan Gazzard]
|
||||
|
||||
# v16.12.4
|
||||
## (2025-03-03)
|
||||
|
||||
* Don't revert to regular pull if delta server 401 [Christina Ying Wang]
|
||||
|
||||
# v16.12.3
|
||||
## (2025-02-19)
|
||||
|
||||
* Retry DELTA_APPLY_RETRY_COUNT (3) times during delta apply fail before reverting to regular pull [Christina Ying Wang]
|
||||
* Revert to regular pull immediately on delta server failure (code 400s) [Christina Ying Wang]
|
||||
|
||||
# v16.12.2
|
||||
## (2025-02-11)
|
||||
|
||||
* Update balena-io/deploy-to-balena-action action to v2.0.92 [balena-renovate[bot]]
|
||||
|
||||
# v16.12.1
|
||||
## (2025-02-10)
|
||||
|
||||
* Pin io-ts version to v2.2.20 [Felipe Lalanne]
|
||||
* Update network-manager to v1 [Felipe Lalanne]
|
||||
* Update balena-request and balena-register-device [Felipe Lalanne]
|
||||
* Update pinejs-client-request to v8 [Felipe Lalanne]
|
||||
* Update chai utility modules [Felipe Lalanne]
|
||||
|
||||
# v16.12.0
|
||||
## (2025-01-20)
|
||||
|
||||
|
@ -2,6 +2,6 @@ name: balena-supervisor
|
||||
description: 'Balena Supervisor: balena''s agent on devices.'
|
||||
joinable: false
|
||||
type: sw.application
|
||||
version: 16.12.0
|
||||
version: 16.12.8
|
||||
provides:
|
||||
- slug: sw.compose.long-volume-syntax
|
||||
|
2052
package-lock.json
generated
2052
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
70
package.json
70
package.json
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "balena-supervisor",
|
||||
"description": "This is balena's Supervisor, a program that runs on IoT devices and has the task of running user Apps (which are Docker containers), and updating them as the balena API informs it to.",
|
||||
"version": "16.12.0",
|
||||
"version": "16.12.8",
|
||||
"license": "Apache-2.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@ -39,13 +39,14 @@
|
||||
"npm": ">=10"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@balena/compose": "^3.2.1",
|
||||
"@balena/compose": "^6.0.0",
|
||||
"@balena/contrato": "^0.12.0",
|
||||
"@balena/es-version": "^1.0.3",
|
||||
"@balena/lint": "^8.0.2",
|
||||
"@balena/sbvr-types": "^9.1.0",
|
||||
"@types/bluebird": "^3.5.42",
|
||||
"@types/chai": "^4.3.14",
|
||||
"@types/chai-as-promised": "^7.1.8",
|
||||
"@types/chai": "^4.3.20",
|
||||
"@types/chai-as-promised": "^8.0.1",
|
||||
"@types/chai-like": "^1.1.3",
|
||||
"@types/chai-things": "0.0.38",
|
||||
"@types/common-tags": "^1.8.4",
|
||||
@ -70,71 +71,72 @@
|
||||
"@types/webpack": "^5.28.5",
|
||||
"@types/yargs": "^17.0.32",
|
||||
"balena-auth": "^6.0.1",
|
||||
"balena-register-device": "^9.0.2",
|
||||
"balena-request": "^13.3.1",
|
||||
"balena-register-device": "^9.0.4",
|
||||
"balena-request": "^14.0.1",
|
||||
"blinking": "^1.0.1",
|
||||
"bluebird": "^3.7.2",
|
||||
"chai": "^4.3.4",
|
||||
"chai-as-promised": "^7.1.1",
|
||||
"chai-like": "^1.1.1",
|
||||
"chai": "^4.5.0",
|
||||
"chai-as-promised": "^8.0.1",
|
||||
"chai-like": "^1.1.3",
|
||||
"chai-things": "^0.2.0",
|
||||
"chokidar": "^3.5.1",
|
||||
"chokidar": "^4.0.3",
|
||||
"common-tags": "^1.8.0",
|
||||
"copy-webpack-plugin": "^12.0.0",
|
||||
"deep-object-diff": "^1.1.0",
|
||||
"deep-object-diff": "1.1.0",
|
||||
"docker-delta": "^4.1.0",
|
||||
"docker-progress": "^5.2.3",
|
||||
"docker-progress": "^5.2.4",
|
||||
"dockerode": "^4.0.2",
|
||||
"duration-js": "^4.0.0",
|
||||
"express": "^4.17.1",
|
||||
"express": "^4.21.2",
|
||||
"fork-ts-checker-webpack-plugin": "^9.0.2",
|
||||
"fp-ts": "^2.16.5",
|
||||
"got": "14.4.1",
|
||||
"husky": "^9.0.11",
|
||||
"io-ts": "^2.2.20",
|
||||
"got": "^14.4.6",
|
||||
"husky": "^9.1.7",
|
||||
"io-ts": "2.2.20",
|
||||
"io-ts-reporters": "^2.0.1",
|
||||
"json-mask": "^2.0.0",
|
||||
"JSONStream": "^1.3.5",
|
||||
"knex": "^3.1.0",
|
||||
"lint-staged": "^15.2.2",
|
||||
"lint-staged": "^15.4.3",
|
||||
"livepush": "^3.5.1",
|
||||
"lodash": "^4.17.21",
|
||||
"mdns-resolver": "1.1.0",
|
||||
"memoizee": "^0.4.14",
|
||||
"mocha": "^10.4.0",
|
||||
"mocha-pod": "^2.0.5",
|
||||
"mocha-pod": "^2.0.10",
|
||||
"morgan": "^1.10.0",
|
||||
"network-checker": "^0.1.1",
|
||||
"nock": "^13.1.2",
|
||||
"node-loader": "^2.0.0",
|
||||
"nodemon": "^3.1.0",
|
||||
"pinejs-client-request": "^7.3.5",
|
||||
"network-checker": "^1.0.2",
|
||||
"nock": "^13.5.6",
|
||||
"node-loader": "^2.1.0",
|
||||
"nodemon": "^3.1.9",
|
||||
"pinejs-client-core": "^7.2.0",
|
||||
"pinejs-client-request": "^8.0.1",
|
||||
"pretty-ms": "^7.0.1",
|
||||
"request": "^2.88.2",
|
||||
"resumable-request": "^2.0.1",
|
||||
"rewire": "^7.0.0",
|
||||
"rimraf": "^5.0.0",
|
||||
"rimraf": "^5.0.10",
|
||||
"rwlock": "^5.0.0",
|
||||
"semver": "7.6.3",
|
||||
"shell-quote": "^1.7.2",
|
||||
"shell-quote": "^1.8.2",
|
||||
"sinon": "^18.0.0",
|
||||
"sinon-chai": "^3.7.0",
|
||||
"strict-event-emitter-types": "^2.0.0",
|
||||
"supertest": "^7.0.0",
|
||||
"systeminformation": "^5.22.7",
|
||||
"systeminformation": "^5.25.11",
|
||||
"tar-stream": "^3.1.7",
|
||||
"terser-webpack-plugin": "^5.3.6",
|
||||
"ts-loader": "^9.4.0",
|
||||
"terser-webpack-plugin": "^5.3.11",
|
||||
"ts-loader": "^9.5.2",
|
||||
"ts-node": "^10.0.0",
|
||||
"tsconfig-paths": "^4.1.0",
|
||||
"tsconfig-paths": "^4.2.0",
|
||||
"typed-error": "^3.2.1",
|
||||
"typescript": "^5.5.4",
|
||||
"webpack": "^5.74.0",
|
||||
"webpack-cli": "^5.0.0",
|
||||
"winston": "^3.3.3",
|
||||
"typescript": "^5.7.3",
|
||||
"webpack": "^5.97.1",
|
||||
"webpack-cli": "^5.1.4",
|
||||
"winston": "^3.17.0",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"versionist": {
|
||||
"publishedAt": "2025-01-20T22:14:36.208Z"
|
||||
"publishedAt": "2025-03-12T14:50:33.763Z"
|
||||
}
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ export async function healthcheck() {
|
||||
}
|
||||
|
||||
// Check last time target state has been polled
|
||||
const timeSinceLastFetch = process.hrtime(TargetState.lastFetch);
|
||||
const timeSinceLastFetch = process.hrtime(TargetState.lastSuccessfulFetch);
|
||||
const timeSinceLastFetchMs =
|
||||
timeSinceLastFetch[0] * 1000 + timeSinceLastFetch[1] / 1e6;
|
||||
|
||||
|
@ -3,6 +3,7 @@ import url from 'url';
|
||||
import { setTimeout } from 'timers/promises';
|
||||
import Bluebird from 'bluebird';
|
||||
import type StrictEventEmitter from 'strict-event-emitter-types';
|
||||
import { Agent } from 'https';
|
||||
|
||||
import type { TargetState } from '../types/state';
|
||||
import { InternalInconsistencyError } from '../lib/errors';
|
||||
@ -87,7 +88,8 @@ const emitTargetState = (
|
||||
* We set a value rather then being undeclared because having it undefined
|
||||
* adds more overhead to dealing with this value without any benefits.
|
||||
*/
|
||||
export let lastFetch: ReturnType<typeof process.hrtime> = process.hrtime();
|
||||
export let lastSuccessfulFetch: ReturnType<typeof process.hrtime> =
|
||||
process.hrtime();
|
||||
|
||||
/**
|
||||
* Attempts to update the target state
|
||||
@ -101,11 +103,11 @@ export const update = async (
|
||||
): Promise<void> => {
|
||||
await config.initialized();
|
||||
return Bluebird.using(lockGetTarget(), async () => {
|
||||
const { uuid, apiEndpoint, apiTimeout, deviceApiKey } =
|
||||
const { uuid, apiEndpoint, apiRequestTimeout, deviceApiKey } =
|
||||
await config.getMany([
|
||||
'uuid',
|
||||
'apiEndpoint',
|
||||
'apiTimeout',
|
||||
'apiRequestTimeout',
|
||||
'deviceApiKey',
|
||||
]);
|
||||
|
||||
@ -119,6 +121,13 @@ export const update = async (
|
||||
const got = await getGotInstance();
|
||||
|
||||
const { statusCode, headers, body } = await got(endpoint, {
|
||||
retry: { limit: 0 },
|
||||
agent: {
|
||||
https: new Agent({
|
||||
keepAlive: true,
|
||||
timeout: apiRequestTimeout,
|
||||
}),
|
||||
},
|
||||
headers: {
|
||||
Authorization: `Bearer ${deviceApiKey}`,
|
||||
'If-None-Match': cache?.etag,
|
||||
@ -126,12 +135,12 @@ export const update = async (
|
||||
timeout: {
|
||||
// TODO: We use the same default timeout for all of these in order to have a timeout generally
|
||||
// but it would probably make sense to tune them individually
|
||||
lookup: apiTimeout,
|
||||
connect: apiTimeout,
|
||||
secureConnect: apiTimeout,
|
||||
socket: apiTimeout,
|
||||
send: apiTimeout,
|
||||
response: apiTimeout,
|
||||
lookup: apiRequestTimeout,
|
||||
connect: apiRequestTimeout,
|
||||
secureConnect: apiRequestTimeout,
|
||||
socket: apiRequestTimeout,
|
||||
send: apiRequestTimeout,
|
||||
response: apiRequestTimeout,
|
||||
},
|
||||
});
|
||||
|
||||
@ -154,8 +163,6 @@ export const update = async (
|
||||
|
||||
// Emit the target state and update the cache
|
||||
cache.emitted = emitTargetState(cache, force, isFromApi);
|
||||
}).finally(() => {
|
||||
lastFetch = process.hrtime();
|
||||
});
|
||||
};
|
||||
|
||||
@ -188,7 +195,11 @@ const poll = async (
|
||||
await update();
|
||||
// Reset fetchErrors because we successfuly updated
|
||||
fetchErrors = 0;
|
||||
} catch {
|
||||
lastSuccessfulFetch = process.hrtime();
|
||||
} catch (e) {
|
||||
if (!(e instanceof ApiResponseError)) {
|
||||
log.error('Target state poll failed', e);
|
||||
}
|
||||
// Exponential back off if request fails
|
||||
pollInterval = Math.min(appUpdatePollInterval, 15000 * 2 ** fetchErrors);
|
||||
++fetchErrors;
|
||||
|
@ -41,14 +41,17 @@ export let stateReportErrors = 0;
|
||||
type StateReportOpts = {
|
||||
[key in keyof Pick<
|
||||
config.ConfigMap<SchemaTypeKey>,
|
||||
'apiEndpoint' | 'apiTimeout' | 'deviceApiKey' | 'appUpdatePollInterval'
|
||||
| 'apiEndpoint'
|
||||
| 'apiRequestTimeout'
|
||||
| 'deviceApiKey'
|
||||
| 'appUpdatePollInterval'
|
||||
>]: SchemaReturn<key>;
|
||||
};
|
||||
|
||||
type StateReport = { body: Partial<DeviceState>; opts: StateReportOpts };
|
||||
|
||||
async function report({ body, opts }: StateReport) {
|
||||
const { apiEndpoint, apiTimeout, deviceApiKey } = opts;
|
||||
const { apiEndpoint, apiRequestTimeout, deviceApiKey } = opts;
|
||||
|
||||
if (!apiEndpoint) {
|
||||
throw new InternalInconsistencyError(
|
||||
@ -69,7 +72,7 @@ async function report({ body, opts }: StateReport) {
|
||||
|
||||
const [{ statusCode, body: statusMessage, headers }] = await request
|
||||
.patchAsync(endpoint, params)
|
||||
.timeout(apiTimeout);
|
||||
.timeout(apiRequestTimeout);
|
||||
|
||||
if (statusCode < 200 || statusCode >= 300) {
|
||||
throw new StatusError(
|
||||
@ -203,7 +206,7 @@ export async function startReporting() {
|
||||
// Get configs needed to make a report
|
||||
const reportConfigs = (await config.getMany([
|
||||
'apiEndpoint',
|
||||
'apiTimeout',
|
||||
'apiRequestTimeout',
|
||||
'deviceApiKey',
|
||||
'appUpdatePollInterval',
|
||||
])) as StateReportOpts;
|
||||
|
@ -247,6 +247,16 @@ class AppImpl implements App {
|
||||
}
|
||||
}
|
||||
|
||||
// Release locks (if any) for all services before settling state
|
||||
if (state.lock || state.hasLeftoverLocks) {
|
||||
return [
|
||||
generateStep('releaseLock', {
|
||||
appId: this.appId,
|
||||
lock: state.lock,
|
||||
}),
|
||||
];
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
|
@ -90,7 +90,7 @@ export const fnSchema = {
|
||||
'deviceArch',
|
||||
'deviceType',
|
||||
'apiEndpoint',
|
||||
'apiTimeout',
|
||||
'apiRequestTimeout',
|
||||
'registered_at',
|
||||
'deviceId',
|
||||
'version',
|
||||
@ -107,7 +107,7 @@ export const fnSchema = {
|
||||
provisioningApiKey: conf.apiKey,
|
||||
deviceApiKey: conf.deviceApiKey,
|
||||
apiEndpoint: conf.apiEndpoint,
|
||||
apiTimeout: conf.apiTimeout,
|
||||
apiRequestTimeout: conf.apiRequestTimeout,
|
||||
registered_at: conf.registered_at,
|
||||
deviceId: conf.deviceId,
|
||||
supervisorVersion: conf.version,
|
||||
|
@ -12,6 +12,9 @@ export const schemaTypes = {
|
||||
type: t.string,
|
||||
default: '',
|
||||
},
|
||||
/**
|
||||
* The timeout for the supervisor's api
|
||||
*/
|
||||
apiTimeout: {
|
||||
type: PermissiveNumber,
|
||||
default: 15 * 60 * 1000,
|
||||
@ -118,6 +121,13 @@ export const schemaTypes = {
|
||||
type: PermissiveBoolean,
|
||||
default: false,
|
||||
},
|
||||
/**
|
||||
* The timeout for requests to the balenaCloud api
|
||||
*/
|
||||
apiRequestTimeout: {
|
||||
type: PermissiveNumber,
|
||||
default: 59000,
|
||||
},
|
||||
deltaRequestTimeout: {
|
||||
type: PermissiveNumber,
|
||||
default: 59000,
|
||||
@ -218,7 +228,7 @@ export const schemaTypes = {
|
||||
provisioningApiKey: t.union([t.string, NullOrUndefined]),
|
||||
deviceApiKey: t.string,
|
||||
apiEndpoint: t.string,
|
||||
apiTimeout: PermissiveNumber,
|
||||
apiRequestTimeout: PermissiveNumber,
|
||||
registered_at: t.union([PermissiveNumber, NullOrUndefined]),
|
||||
deviceId: t.union([PermissiveNumber, NullOrUndefined]),
|
||||
supervisorVersion: t.union([t.string, t.undefined]),
|
||||
|
@ -4,6 +4,9 @@ export const schema = {
|
||||
mutable: false,
|
||||
removeIfNull: false,
|
||||
},
|
||||
/**
|
||||
* The timeout for the supervisor's api
|
||||
*/
|
||||
apiTimeout: {
|
||||
source: 'config.json',
|
||||
mutable: false,
|
||||
@ -120,6 +123,11 @@ export const schema = {
|
||||
mutable: true,
|
||||
removeIfNull: false,
|
||||
},
|
||||
apiRequestTimeout: {
|
||||
source: 'db',
|
||||
mutable: true,
|
||||
removeIfNull: false,
|
||||
},
|
||||
delta: {
|
||||
source: 'db',
|
||||
mutable: true,
|
||||
|
@ -141,6 +141,11 @@ const configKeys: Dictionary<ConfigOption> = {
|
||||
varType: 'bool',
|
||||
defaultValue: 'true',
|
||||
},
|
||||
apiRequestTimeout: {
|
||||
envVarName: 'SUPERVISOR_API_REQUEST_TIMEOUT',
|
||||
varType: 'int',
|
||||
defaultValue: '59000',
|
||||
},
|
||||
delta: {
|
||||
envVarName: 'SUPERVISOR_DELTA',
|
||||
varType: 'bool',
|
||||
|
@ -111,10 +111,10 @@ export const exchangeKeyAndGetDevice = async (
|
||||
opts: Partial<KeyExchangeOpts>,
|
||||
): Promise<Device> => {
|
||||
const uuid = opts.uuid;
|
||||
const apiTimeout = opts.apiTimeout;
|
||||
if (!(uuid && apiTimeout)) {
|
||||
const apiRequestTimeout = opts.apiRequestTimeout;
|
||||
if (!(uuid && apiRequestTimeout)) {
|
||||
throw new InternalInconsistencyError(
|
||||
'UUID and apiTimeout should be defined in exchangeKeyAndGetDevice',
|
||||
'UUID and apiRequestTimeout should be defined in exchangeKeyAndGetDevice',
|
||||
);
|
||||
}
|
||||
|
||||
@ -122,7 +122,12 @@ export const exchangeKeyAndGetDevice = async (
|
||||
// valid, because if it is then we can just use that
|
||||
if (opts.deviceApiKey != null) {
|
||||
try {
|
||||
return await fetchDevice(balenaApi, uuid, opts.deviceApiKey, apiTimeout);
|
||||
return await fetchDevice(
|
||||
balenaApi,
|
||||
uuid,
|
||||
opts.deviceApiKey,
|
||||
apiRequestTimeout,
|
||||
);
|
||||
} catch (e) {
|
||||
if (e instanceof DeviceNotFoundError) {
|
||||
// do nothing...
|
||||
@ -146,7 +151,7 @@ export const exchangeKeyAndGetDevice = async (
|
||||
balenaApi,
|
||||
uuid,
|
||||
opts.provisioningApiKey,
|
||||
apiTimeout,
|
||||
apiRequestTimeout,
|
||||
);
|
||||
} catch {
|
||||
throw new ExchangeKeyError(`Couldn't fetch device with provisioning key`);
|
||||
@ -165,7 +170,7 @@ export const exchangeKeyAndGetDevice = async (
|
||||
Authorization: `Bearer ${opts.provisioningApiKey}`,
|
||||
},
|
||||
})
|
||||
.timeout(apiTimeout);
|
||||
.timeout(apiRequestTimeout);
|
||||
|
||||
if (res.statusCode !== 200) {
|
||||
throw new ExchangeKeyError(
|
||||
@ -220,7 +225,7 @@ export const provision = async (
|
||||
osVariant: opts.osVariant,
|
||||
macAddress: opts.macAddress,
|
||||
}),
|
||||
).timeout(opts.apiTimeout);
|
||||
).timeout(opts.apiRequestTimeout);
|
||||
} catch (err) {
|
||||
if (
|
||||
err instanceof deviceRegister.ApiError &&
|
||||
|
@ -1,22 +1,23 @@
|
||||
import type { ProgressCallback } from 'docker-progress';
|
||||
import { DockerProgress } from 'docker-progress';
|
||||
import type { ProgressCallback } from 'docker-progress';
|
||||
import Dockerode from 'dockerode';
|
||||
import _ from 'lodash';
|
||||
import memoizee from 'memoizee';
|
||||
|
||||
import { applyDelta, OutOfSyncError } from 'docker-delta';
|
||||
|
||||
import type { SchemaReturn } from '../config/schema-type';
|
||||
import log from './supervisor-console';
|
||||
import { envArrayToObject } from './conversions';
|
||||
import * as request from './request';
|
||||
import {
|
||||
DeltaStillProcessingError,
|
||||
ImageAuthenticationError,
|
||||
InvalidNetGatewayError,
|
||||
DeltaServerError,
|
||||
DeltaApplyError,
|
||||
isStatusError,
|
||||
} from './errors';
|
||||
import * as request from './request';
|
||||
import type { EnvVarObject } from '../types';
|
||||
|
||||
import log from './supervisor-console';
|
||||
import type { SchemaReturn } from '../config/schema-type';
|
||||
|
||||
export type FetchOptions = SchemaReturn<'fetchOptions'>;
|
||||
export type DeltaFetchOptions = FetchOptions & {
|
||||
@ -41,6 +42,18 @@ type ImageNameParts = {
|
||||
// (10 mins)
|
||||
const DELTA_TOKEN_TIMEOUT = 10 * 60 * 1000;
|
||||
|
||||
// How many times to retry a v3 delta apply before falling back to a regular pull.
|
||||
// A delta is applied to the base image when pulling, so a failure could be due to
|
||||
// "layers from manifest don't match image configuration", which can occur before
|
||||
// or after downloading delta image layers.
|
||||
//
|
||||
// Other causes of failure have not been documented as clearly as "layers from manifest"
|
||||
// but could manifest as well, though unclear if they occur before, after, or during
|
||||
// downloading delta image layers.
|
||||
//
|
||||
// See: https://github.com/balena-os/balena-engine/blob/master/distribution/pull_v2.go#L43
|
||||
const DELTA_APPLY_RETRY_COUNT = 3;
|
||||
|
||||
export const docker = new Dockerode();
|
||||
export const dockerProgress = new DockerProgress({
|
||||
docker,
|
||||
@ -113,11 +126,7 @@ export async function fetchDeltaWithProgress(
|
||||
onProgress: ProgressCallback,
|
||||
serviceName: string,
|
||||
): Promise<string> {
|
||||
const deltaSourceId =
|
||||
deltaOpts.deltaSourceId != null
|
||||
? deltaOpts.deltaSourceId
|
||||
: deltaOpts.deltaSource;
|
||||
|
||||
const deltaSourceId = deltaOpts.deltaSourceId ?? deltaOpts.deltaSource;
|
||||
const timeout = deltaOpts.deltaApplyTimeout;
|
||||
|
||||
const logFn = (str: string) =>
|
||||
@ -143,7 +152,7 @@ export async function fetchDeltaWithProgress(
|
||||
}
|
||||
|
||||
// Since the supevisor never calls this function with a source anymore,
|
||||
// this should never happen, but w ehandle it anyway
|
||||
// this should never happen, but we handle it anyway
|
||||
if (deltaOpts.deltaSource == null) {
|
||||
logFn('Falling back to regular pull due to lack of a delta source');
|
||||
return fetchImageWithProgress(imgDest, deltaOpts, onProgress);
|
||||
@ -210,6 +219,18 @@ export async function fetchDeltaWithProgress(
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
// If 400s status code, throw a more specific error & revert immediately to a regular pull,
|
||||
// unless the code is 401 Unauthorized, in which case we should surface the error by retrying
|
||||
// the delta server request, instead of falling back to a regular pull immediately.
|
||||
if (res.statusCode >= 400 && res.statusCode < 500) {
|
||||
if (res.statusCode === 401) {
|
||||
throw new Error(
|
||||
`Got ${res.statusCode} when requesting an image from delta server: ${res.statusMessage}`,
|
||||
);
|
||||
} else {
|
||||
throw new DeltaServerError(res.statusCode, res.statusMessage);
|
||||
}
|
||||
}
|
||||
if (res.statusCode !== 200) {
|
||||
throw new Error(
|
||||
`Got ${res.statusCode} when requesting v3 delta from delta server.`,
|
||||
@ -225,24 +246,62 @@ export async function fetchDeltaWithProgress(
|
||||
`Got an error when parsing delta server response for v3 delta: ${e}`,
|
||||
);
|
||||
}
|
||||
id = await applyBalenaDelta(name, token, onProgress, logFn);
|
||||
// Try to apply delta DELTA_APPLY_RETRY_COUNT times, then throw DeltaApplyError
|
||||
let lastError: Error | undefined = undefined;
|
||||
for (
|
||||
let tryCount = 0;
|
||||
tryCount < DELTA_APPLY_RETRY_COUNT;
|
||||
tryCount++
|
||||
) {
|
||||
try {
|
||||
id = await applyBalenaDelta(name, token, onProgress, logFn);
|
||||
break;
|
||||
} catch (e) {
|
||||
if (isStatusError(e)) {
|
||||
// A status error during delta pull indicates network issues,
|
||||
// so we should throw an error to the handler that indicates that
|
||||
// the delta pull should be retried until network issues are resolved,
|
||||
// rather than falling back to a regular pull.
|
||||
throw e;
|
||||
}
|
||||
lastError = e as Error;
|
||||
logFn(
|
||||
`Delta apply failed, retrying (${tryCount + 1}/${DELTA_APPLY_RETRY_COUNT})...`,
|
||||
);
|
||||
}
|
||||
}
|
||||
if (lastError) {
|
||||
throw new DeltaApplyError(lastError.message);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported delta version: ${deltaOpts.deltaVersion}`);
|
||||
}
|
||||
} catch (e) {
|
||||
// Log appropriate message based on error type
|
||||
if (e instanceof OutOfSyncError) {
|
||||
logFn('Falling back to regular pull due to delta out of sync error');
|
||||
return await fetchImageWithProgress(imgDest, deltaOpts, onProgress);
|
||||
} else if (e instanceof DeltaServerError) {
|
||||
logFn(
|
||||
`Falling back to regular pull due to delta server error (${e.statusCode})${e.statusMessage ? `: ${e.statusMessage}` : ''}`,
|
||||
);
|
||||
} else if (e instanceof DeltaApplyError) {
|
||||
// A delta apply error is raised from the Engine and doesn't have a status code
|
||||
logFn(
|
||||
`Falling back to regular pull due to delta apply error ${e.message ? `: ${e.message}` : ''}`,
|
||||
);
|
||||
} else {
|
||||
logFn(`Delta failed with ${e}`);
|
||||
throw e;
|
||||
}
|
||||
|
||||
// For handled errors, fall back to regular pull
|
||||
return fetchImageWithProgress(imgDest, deltaOpts, onProgress);
|
||||
}
|
||||
|
||||
logFn(`Delta applied successfully`);
|
||||
return id;
|
||||
return id!;
|
||||
}
|
||||
|
||||
export async function fetchImageWithProgress(
|
||||
|
@ -70,6 +70,13 @@ export class InvalidNetGatewayError extends TypedError {}
|
||||
|
||||
export class DeltaStillProcessingError extends TypedError {}
|
||||
|
||||
export class DeltaServerError extends StatusError {}
|
||||
export class DeltaApplyError extends Error {
|
||||
constructor(message?: string) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
|
||||
export class UpdatesLockedError extends TypedError {}
|
||||
|
||||
export function isHttpConflictError(err: { statusCode: number }): boolean {
|
||||
|
@ -1,6 +1,7 @@
|
||||
import _ from 'lodash';
|
||||
import { promises as fs, watch } from 'fs';
|
||||
import networkCheck from 'network-checker';
|
||||
import { checkHost as checkNetHost, monitor } from 'network-checker';
|
||||
import type { ConnectOptions, MonitorChangeFunction } from 'network-checker';
|
||||
import os from 'os';
|
||||
import url from 'url';
|
||||
|
||||
@ -20,21 +21,16 @@ const networkPattern = {
|
||||
let isConnectivityCheckPaused = false;
|
||||
let isConnectivityCheckEnabled = true;
|
||||
|
||||
function checkHost(
|
||||
opts: networkCheck.ConnectOptions,
|
||||
): boolean | PromiseLike<boolean> {
|
||||
async function checkHost(opts: ConnectOptions): Promise<boolean> {
|
||||
return (
|
||||
!isConnectivityCheckEnabled ||
|
||||
isConnectivityCheckPaused ||
|
||||
networkCheck.checkHost(opts)
|
||||
(await checkNetHost(opts))
|
||||
);
|
||||
}
|
||||
|
||||
function customMonitor(
|
||||
options: networkCheck.ConnectOptions,
|
||||
fn: networkCheck.MonitorChangeFunction,
|
||||
) {
|
||||
return networkCheck.monitor(checkHost, options, fn);
|
||||
function customMonitor(options: ConnectOptions, fn: MonitorChangeFunction) {
|
||||
return monitor(checkHost, options, fn);
|
||||
}
|
||||
|
||||
export function enableCheck(enable: boolean) {
|
||||
@ -60,7 +56,7 @@ export const startConnectivityCheck = _.once(
|
||||
async (
|
||||
apiEndpoint: string,
|
||||
enable: boolean,
|
||||
onChangeCallback?: networkCheck.MonitorChangeFunction,
|
||||
onChangeCallback?: MonitorChangeFunction,
|
||||
) => {
|
||||
enableConnectivityCheck(enable);
|
||||
if (!apiEndpoint) {
|
||||
|
@ -84,6 +84,7 @@ describe('device-config', () => {
|
||||
SUPERVISOR_LOCAL_MODE: 'false',
|
||||
SUPERVISOR_CONNECTIVITY_CHECK: 'true',
|
||||
SUPERVISOR_LOG_CONTROL: 'true',
|
||||
SUPERVISOR_API_REQUEST_TIMEOUT: '59000',
|
||||
SUPERVISOR_DELTA: 'false',
|
||||
SUPERVISOR_DELTA_REQUEST_TIMEOUT: '59000',
|
||||
SUPERVISOR_DELTA_APPLY_TIMEOUT: '0',
|
||||
|
@ -335,7 +335,7 @@ describe('ApiBinder', () => {
|
||||
|
||||
before(async () => {
|
||||
await initModels(components, '/config-apibinder.json');
|
||||
previousLastFetch = TargetState.lastFetch;
|
||||
previousLastFetch = TargetState.lastSuccessfulFetch;
|
||||
});
|
||||
|
||||
after(async () => {
|
||||
|
@ -1,5 +1,4 @@
|
||||
import chai from 'chai';
|
||||
import chaiAsPromised from 'chai-as-promised';
|
||||
import sinonChai from 'sinon-chai';
|
||||
import chaiThings from 'chai-things';
|
||||
import chaiLike from 'chai-like';
|
||||
@ -14,9 +13,11 @@ import chaiLike from 'chai-like';
|
||||
* If unsure whether to add to global fixtures, refer to the chart above.
|
||||
* Also, avoid setting global mutable variables here.
|
||||
*/
|
||||
export const mochaGlobalSetup = function () {
|
||||
export const mochaGlobalSetup = async function () {
|
||||
console.log('Setting up global fixtures for tests...');
|
||||
|
||||
const { default: chaiAsPromised } = await import('chai-as-promised');
|
||||
|
||||
/* Setup chai assertion plugins */
|
||||
chai.use(chaiAsPromised);
|
||||
chai.use(sinonChai);
|
||||
|
@ -2399,5 +2399,19 @@ describe('compose/app', () => {
|
||||
const [releaseLockStep] = expectSteps('releaseLock', steps, 1);
|
||||
expect(releaseLockStep).to.have.property('appId').that.equals(1);
|
||||
});
|
||||
|
||||
it('should infer a releaseLock step when removing an app', async () => {
|
||||
const current = createApp({
|
||||
services: [],
|
||||
networks: [],
|
||||
});
|
||||
|
||||
const steps = current.stepsToRemoveApp({
|
||||
...defaultContext,
|
||||
lock: mockLock,
|
||||
});
|
||||
const [releaseLockStep] = expectSteps('releaseLock', steps, 1);
|
||||
expect(releaseLockStep).to.have.property('appId').that.equals(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user