mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-03-15 00:36:35 +00:00
Compare commits
54 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
b8032edc04 | ||
|
175872b358 | ||
|
ae337a1dd7 | ||
|
bdbc6a4ba4 | ||
|
978652b292 | ||
|
7771c0e96b | ||
|
026dc0aed2 | ||
|
5ef6b054fd | ||
|
3cca2b7ecd | ||
|
3d8bd28f5a | ||
|
6d00be2093 | ||
|
f8bdb14335 | ||
|
c88cf6a259 | ||
|
906ce6dc0d | ||
|
49163e92a0 | ||
|
f67e45f432 | ||
|
91335051ac | ||
|
2dc9d275b1 | ||
|
b6f0ecba18 | ||
|
dd0253ff1f | ||
|
5936af37e7 | ||
|
341111f1f9 | ||
|
1fc242200f | ||
|
5c94c61b0a | ||
|
43426a4a26 | ||
|
c57622e226 | ||
|
5fca7c25bc | ||
|
e901c38df0 | ||
|
f99e19f8a9 | ||
|
f4b1acba89 | ||
|
88e821ed8e | ||
|
58824066e0 | ||
|
f71f98777c | ||
|
25e46574ab | ||
|
52081ba15e | ||
|
342a2d4dac | ||
|
e474a9d95d | ||
|
3a3889546d | ||
|
3fbd98e218 | ||
|
84b9d869e1 | ||
|
85fc5784bc | ||
|
55f22dbc0f | ||
|
ea594b18ab | ||
|
2637d997b6 | ||
|
bc306c1bc9 | ||
|
e416ad0daf | ||
|
75127c6074 | ||
|
51f1fb0f30 | ||
|
89ddfa6554 | ||
|
86b8576d14 | ||
|
92b26c7ae2 | ||
|
f198983ede | ||
|
1a4580554e | ||
|
8e6c0fcad7 |
2
.github/actions/finalize/action.yml
vendored
2
.github/actions/finalize/action.yml
vendored
@ -13,7 +13,7 @@ inputs:
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- uses: balena-io/deploy-to-balena-action@72b7652cd8b4b0b49376f60fe790eef9ba76e3f0 # v2.0.74
|
||||
- uses: balena-io/deploy-to-balena-action@3cb4217ab3347a885b4fcdc44d5f3a4153145633 # v2.0.92
|
||||
with:
|
||||
balena_token: ${{ fromJSON(inputs.secrets).BALENA_STAGING_TOKEN }}
|
||||
fleet: ${{ env.matrix_value }}
|
||||
|
2
.github/actions/publish/action.yml
vendored
2
.github/actions/publish/action.yml
vendored
@ -13,7 +13,7 @@ inputs:
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- uses: balena-io/deploy-to-balena-action@72b7652cd8b4b0b49376f60fe790eef9ba76e3f0 # v2.0.74
|
||||
- uses: balena-io/deploy-to-balena-action@3cb4217ab3347a885b4fcdc44d5f3a4153145633 # v2.0.92
|
||||
with:
|
||||
balena_token: ${{ fromJSON(inputs.secrets).BALENA_STAGING_TOKEN }}
|
||||
fleet: ${{ env.matrix_value }}
|
||||
|
@ -1,3 +1,333 @@
|
||||
- commits:
|
||||
- subject: Remove GOT retries on state poll
|
||||
hash: ae337a1dd7743b0ee0a05c32a5ce01965c5bafef
|
||||
body: |
|
||||
The state poll already has retry implementation, making the GOT default
|
||||
unnecessary.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Ensure poll socket timeout is defined early
|
||||
hash: bdbc6a4ba4766f9466891497bc02bd33aff1d4c7
|
||||
body: |
|
||||
We have observed that even when setting the socket timeout on the
|
||||
state poll https request, the timeout is only applied once the socket is
|
||||
connected. This causes issues with Node's auto family selection (happy
|
||||
eyeballs), as the default https timeout is 5s which means that larger
|
||||
[auto select attempt timeout](https://nodejs.org/docs/latest-v22.x/api/net.html#netgetdefaultautoselectfamilyattempttimeout) may result in the socket timing out before all connection attempts have been tried.
|
||||
|
||||
This commit sets a different https Agent for state polling, with a
|
||||
timeout matching the `apiRequestTimeout` used for other request events.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.12.8
|
||||
title: ""
|
||||
date: 2025-03-12T14:50:33.204Z
|
||||
- commits:
|
||||
- subject: Release locks when removing apps
|
||||
hash: 026dc0aed29ce7d66cfdd8616d80d1f5daf3ad46
|
||||
body: |
|
||||
This prevents leftover locks that can prevent other operations from
|
||||
taking place.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.12.7
|
||||
title: ""
|
||||
date: 2025-03-06T19:11:18.704Z
|
||||
- commits:
|
||||
- subject: Log non-API errors during state poll
|
||||
hash: 6d00be20930398699da1006176dac1e81b2dbbd6
|
||||
body: >
|
||||
The supervisor was failing silently if an error happened while
|
||||
establishing the
|
||||
|
||||
connection (e.g. requesting the socket).
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Fix target poll healthcheck
|
||||
hash: f8bdb1433508dcaeff12a78d746256041ba1c414
|
||||
body: |
|
||||
The Target.lastFetch time compared when performing the healthcheck
|
||||
resets any time a poll is attempted no matter the outcome. This changes
|
||||
the behavior so the time is reset only on a successful poll
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.12.6
|
||||
title: ""
|
||||
date: 2025-03-04T14:25:06.565Z
|
||||
- commits:
|
||||
- subject: Decrease balenaCloud api request timeout from 15m to 59s
|
||||
hash: 49163e92a013250f72ca7231e11945b465c4dd45
|
||||
body: |
|
||||
This was mistakenly increased due to confusion between the timeout for
|
||||
requests to the supervisor's api vs the timeout for requests from the
|
||||
supervisor to the balenaCloud api. This separates the two configs and
|
||||
documents the difference between the timeouts whilst also decreasing
|
||||
the timeout for balenaCloud api requests to the correct/expected value
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Pagan Gazzard
|
||||
nested: []
|
||||
version: 16.12.5
|
||||
title: ""
|
||||
date: 2025-03-04T13:35:26.801Z
|
||||
- commits:
|
||||
- subject: Don't revert to regular pull if delta server 401
|
||||
hash: 2dc9d275b15a0802264bcd49e2f0dddbbadd2225
|
||||
body: |
|
||||
If the Supervisor receives a 401 Unauthorized from the delta server
|
||||
when requesting a delta image location, we should surface the error
|
||||
instead of falling back to a regular pull immediately, as there could
|
||||
be an issue with the delta auth token, which refreshes after
|
||||
DELTA_TOKEN_TIMEOUT (10min), or some other edge case.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
Signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
author: Christina Ying Wang
|
||||
nested: []
|
||||
version: 16.12.4
|
||||
title: ""
|
||||
date: 2025-03-03T13:42:18.045Z
|
||||
- commits:
|
||||
- subject: Retry DELTA_APPLY_RETRY_COUNT (3) times during delta apply fail before
|
||||
reverting to regular pull
|
||||
hash: 341111f1f94cd9f17fd7be9b6f21e3bc22c9ad3a
|
||||
body: >
|
||||
This prevents an image download error loop where the delta image on the
|
||||
delta server is present,
|
||||
|
||||
but some aspect of the delta image or the base image on the device does
|
||||
not match up, causing
|
||||
|
||||
the delta to fail to be applied to the base image.
|
||||
|
||||
|
||||
Delta apply errors don't raise status codes as they are thrown from the
|
||||
Engine (although they should),
|
||||
|
||||
so if an error with a status code is raised during this time, throw an
|
||||
error to the handler
|
||||
|
||||
indicating that the delta should be retried until success. Errors with
|
||||
status codes raised during
|
||||
|
||||
this time are largely network related, so falling back to a regular pull
|
||||
won't improve anything.
|
||||
|
||||
|
||||
Upon delta apply errors exceeding DELTA_APPLY_RETRY_COUNT, revert to a
|
||||
regular pull.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
Signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
author: Christina Ying Wang
|
||||
nested: []
|
||||
- subject: Revert to regular pull immediately on delta server failure (code 400s)
|
||||
hash: 1fc242200f78e4219aafc5bb91de8cf0916236af
|
||||
body: >
|
||||
If the delta server responds immediately with HTTP 4xx upon requesting a
|
||||
delta image,
|
||||
|
||||
this means the server is not able to supply the resource, so fall back
|
||||
to a regular pull
|
||||
|
||||
immediately.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
Signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
signed-off-by: Christina Ying Wang <christina@balena.io>
|
||||
author: Christina Ying Wang
|
||||
nested: []
|
||||
version: 16.12.3
|
||||
title: ""
|
||||
date: 2025-02-19T20:51:53.085Z
|
||||
- commits:
|
||||
- subject: Update balena-io/deploy-to-balena-action action to v2.0.92
|
||||
hash: c57622e2264e41078e907d6ba8de9d5206bb6293
|
||||
body: |
|
||||
Update balena-io/deploy-to-balena-action from 2.0.74 to 2.0.92
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: balena-renovate[bot]
|
||||
nested: []
|
||||
version: 16.12.2
|
||||
title: ""
|
||||
date: 2025-02-11T01:04:22.736Z
|
||||
- commits:
|
||||
- subject: Pin io-ts version to v2.2.20
|
||||
hash: 88e821ed8e36e10d6429dc31950b5aeed968aa3f
|
||||
body: |
|
||||
gcanti/io-ts#705 fixes an issue with io-ts and non-enumerable
|
||||
properties, but that results in objects with invalid properties to get
|
||||
removed during `decode`, which breaks our validation tests.
|
||||
|
||||
Need to figure out what is the right behavior for us
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update network-manager to v1
|
||||
hash: f71f98777cbf7198745f1dcb8467b8cc62719d85
|
||||
body: ""
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update balena-request and balena-register-device
|
||||
hash: 52081ba15e84be794a906d5cbccc343b24748bba
|
||||
body: ""
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update pinejs-client-request to v8
|
||||
hash: 342a2d4dac737274ab13a8b05eac0f1f036a5075
|
||||
body: ""
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update chai utility modules
|
||||
hash: 3a3889546d8546793914bc2b5da10e202ebb14b1
|
||||
body: |
|
||||
Updating chai will be done in a future PR as it requires overhauling all
|
||||
tests since chai is now ESM
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.12.1
|
||||
title: ""
|
||||
date: 2025-02-10T22:51:51.632Z
|
||||
- commits:
|
||||
- subject: Update contrato to v0.12.0
|
||||
hash: 85fc5784bcd187d086bffbd0c2167ce9eb34650f
|
||||
body: ""
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update alpine base image to 3.21
|
||||
hash: 55f22dbc0f4792033b6253af89c6adde6a727ab0
|
||||
body: |
|
||||
This allows to update Node to v22 on production supervisor images
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Update Node support to v22
|
||||
hash: ea594b18abb6b82f498071e50f71422dedb5b280
|
||||
body: |
|
||||
Updates @types/node and expands module support to v22.
|
||||
Support for v20 will be removed on a future version.
|
||||
footer:
|
||||
Change-type: minor
|
||||
change-type: minor
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.12.0
|
||||
title: ""
|
||||
date: 2025-01-20T22:14:35.646Z
|
||||
- commits:
|
||||
- subject: Add support for `io.balena.update.requires-reboot`
|
||||
hash: e416ad0daf61fba14cd8c2012c5b2f66d8fb5f4a
|
||||
body: >
|
||||
This label can be used by user services to indicate that a reboot is
|
||||
|
||||
required after the install of a service in order to fully apply an
|
||||
update.
|
||||
footer:
|
||||
Change-type: minor
|
||||
change-type: minor
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Move reboot breadcrumb check to device-state
|
||||
hash: 75127c6074531fd20199ed07d6860687b4105cfb
|
||||
body: |
|
||||
This was on device-config before, but we'll need to set the reboot
|
||||
breadcrumb from the application-manager as well when we introduce
|
||||
`requires-reboot` as a label.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
- subject: Refactor device-config as part of device-state
|
||||
hash: 51f1fb0f30e04ece6a00d2d8b9420b49703a2fde
|
||||
body: |
|
||||
Move the device-config module to the device-state folder and export only
|
||||
those functions that are needed elsewhere in the codebase
|
||||
|
||||
This moves us closer to making the device-state module the only way to
|
||||
modify application and configuration.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.11.0
|
||||
title: ""
|
||||
date: 2025-01-14T18:15:55.879Z
|
||||
- commits:
|
||||
- subject: Update systeminformation to v5.23.8 [SECURITY]
|
||||
hash: 92b26c7ae2d8d329be18806abe24ab312e92db68
|
||||
body: |
|
||||
Update systeminformation from 5.22.7 to 5.23.8
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: balena-renovate[bot]
|
||||
nested: []
|
||||
version: 16.10.3
|
||||
title: ""
|
||||
date: 2024-12-20T20:43:23.372Z
|
||||
- commits:
|
||||
- subject: Wait for service dependencies to be running
|
||||
hash: 8e6c0fcad729b56e386ac27754c48c97517e293a
|
||||
body: |
|
||||
This fixes a regression where dependencies would only be started in
|
||||
order and would start the dependent service if its dependency had been
|
||||
started at some point in the past, regardless of the running condition.
|
||||
|
||||
This makes the behavior more consistent with docker compose where the
|
||||
[dependency needs to be
|
||||
running or healthy](https://github.com/docker/compose/blob/69a83d1303a103d82b05d512baf273244b4dbd94/pkg/compose/convergence.go#L441) for the service to be started.
|
||||
footer:
|
||||
Change-type: patch
|
||||
change-type: patch
|
||||
author: Felipe Lalanne
|
||||
nested: []
|
||||
version: 16.10.2
|
||||
title: ""
|
||||
date: 2024-12-18T18:48:52.008Z
|
||||
- commits:
|
||||
- subject: Specify `/tmp/balena|resin` directories as necessary
|
||||
hash: dc740a159b9b299b85885bc8ab65e97cc4d356a3
|
||||
|
71
CHANGELOG.md
71
CHANGELOG.md
@ -4,6 +4,77 @@ All notable changes to this project will be documented in this file
|
||||
automatically by Versionist. DO NOT EDIT THIS FILE MANUALLY!
|
||||
This project adheres to [Semantic Versioning](http://semver.org/).
|
||||
|
||||
# v16.12.8
|
||||
## (2025-03-12)
|
||||
|
||||
* Remove GOT retries on state poll [Felipe Lalanne]
|
||||
* Ensure poll socket timeout is defined early [Felipe Lalanne]
|
||||
|
||||
# v16.12.7
|
||||
## (2025-03-06)
|
||||
|
||||
* Release locks when removing apps [Felipe Lalanne]
|
||||
|
||||
# v16.12.6
|
||||
## (2025-03-04)
|
||||
|
||||
* Log non-API errors during state poll [Felipe Lalanne]
|
||||
* Fix target poll healthcheck [Felipe Lalanne]
|
||||
|
||||
# v16.12.5
|
||||
## (2025-03-04)
|
||||
|
||||
* Decrease balenaCloud api request timeout from 15m to 59s [Pagan Gazzard]
|
||||
|
||||
# v16.12.4
|
||||
## (2025-03-03)
|
||||
|
||||
* Don't revert to regular pull if delta server 401 [Christina Ying Wang]
|
||||
|
||||
# v16.12.3
|
||||
## (2025-02-19)
|
||||
|
||||
* Retry DELTA_APPLY_RETRY_COUNT (3) times during delta apply fail before reverting to regular pull [Christina Ying Wang]
|
||||
* Revert to regular pull immediately on delta server failure (code 400s) [Christina Ying Wang]
|
||||
|
||||
# v16.12.2
|
||||
## (2025-02-11)
|
||||
|
||||
* Update balena-io/deploy-to-balena-action action to v2.0.92 [balena-renovate[bot]]
|
||||
|
||||
# v16.12.1
|
||||
## (2025-02-10)
|
||||
|
||||
* Pin io-ts version to v2.2.20 [Felipe Lalanne]
|
||||
* Update network-manager to v1 [Felipe Lalanne]
|
||||
* Update balena-request and balena-register-device [Felipe Lalanne]
|
||||
* Update pinejs-client-request to v8 [Felipe Lalanne]
|
||||
* Update chai utility modules [Felipe Lalanne]
|
||||
|
||||
# v16.12.0
|
||||
## (2025-01-20)
|
||||
|
||||
* Update contrato to v0.12.0 [Felipe Lalanne]
|
||||
* Update alpine base image to 3.21 [Felipe Lalanne]
|
||||
* Update Node support to v22 [Felipe Lalanne]
|
||||
|
||||
# v16.11.0
|
||||
## (2025-01-14)
|
||||
|
||||
* Add support for `io.balena.update.requires-reboot` [Felipe Lalanne]
|
||||
* Move reboot breadcrumb check to device-state [Felipe Lalanne]
|
||||
* Refactor device-config as part of device-state [Felipe Lalanne]
|
||||
|
||||
# v16.10.3
|
||||
## (2024-12-20)
|
||||
|
||||
* Update systeminformation to v5.23.8 [SECURITY] [balena-renovate[bot]]
|
||||
|
||||
# v16.10.2
|
||||
## (2024-12-18)
|
||||
|
||||
* Wait for service dependencies to be running [Felipe Lalanne]
|
||||
|
||||
# v16.10.1
|
||||
## (2024-12-11)
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
ARG ARCH=%%BALENA_ARCH%%
|
||||
ARG FATRW_VERSION=0.2.21
|
||||
ARG NODE="nodejs~=20"
|
||||
ARG NODE="nodejs~=22"
|
||||
ARG NPM="npm~=10"
|
||||
ARG ALPINE_VERSION="3.19"
|
||||
ARG ALPINE_VERSION="3.21"
|
||||
|
||||
###################################################
|
||||
# Build the supervisor dependencies
|
||||
|
@ -2,6 +2,6 @@ name: balena-supervisor
|
||||
description: 'Balena Supervisor: balena''s agent on devices.'
|
||||
joinable: false
|
||||
type: sw.application
|
||||
version: 16.10.1
|
||||
version: 16.12.8
|
||||
provides:
|
||||
- slug: sw.compose.long-volume-syntax
|
||||
|
2441
package-lock.json
generated
2441
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
76
package.json
76
package.json
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "balena-supervisor",
|
||||
"description": "This is balena's Supervisor, a program that runs on IoT devices and has the task of running user Apps (which are Docker containers), and updating them as the balena API informs it to.",
|
||||
"version": "16.10.1",
|
||||
"version": "16.12.8",
|
||||
"license": "Apache-2.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@ -35,17 +35,18 @@
|
||||
"sqlite3": "^5.1.6"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20 <21",
|
||||
"node": ">=20 <23",
|
||||
"npm": ">=10"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@balena/compose": "^3.2.1",
|
||||
"@balena/contrato": "^0.9.4",
|
||||
"@balena/compose": "^6.0.0",
|
||||
"@balena/contrato": "^0.12.0",
|
||||
"@balena/es-version": "^1.0.3",
|
||||
"@balena/lint": "^8.0.2",
|
||||
"@balena/sbvr-types": "^9.1.0",
|
||||
"@types/bluebird": "^3.5.42",
|
||||
"@types/chai": "^4.3.14",
|
||||
"@types/chai-as-promised": "^7.1.8",
|
||||
"@types/chai": "^4.3.20",
|
||||
"@types/chai-as-promised": "^8.0.1",
|
||||
"@types/chai-like": "^1.1.3",
|
||||
"@types/chai-things": "0.0.38",
|
||||
"@types/common-tags": "^1.8.4",
|
||||
@ -57,7 +58,7 @@
|
||||
"@types/memoizee": "^0.4.11",
|
||||
"@types/mocha": "^10.0.6",
|
||||
"@types/morgan": "^1.9.9",
|
||||
"@types/node": "^20.12.7",
|
||||
"@types/node": "^22.10.6",
|
||||
"@types/request": "^2.48.12",
|
||||
"@types/rewire": "^2.5.30",
|
||||
"@types/rwlock": "^5.0.6",
|
||||
@ -70,71 +71,72 @@
|
||||
"@types/webpack": "^5.28.5",
|
||||
"@types/yargs": "^17.0.32",
|
||||
"balena-auth": "^6.0.1",
|
||||
"balena-register-device": "^9.0.2",
|
||||
"balena-request": "^13.3.1",
|
||||
"balena-register-device": "^9.0.4",
|
||||
"balena-request": "^14.0.1",
|
||||
"blinking": "^1.0.1",
|
||||
"bluebird": "^3.7.2",
|
||||
"chai": "^4.3.4",
|
||||
"chai-as-promised": "^7.1.1",
|
||||
"chai-like": "^1.1.1",
|
||||
"chai": "^4.5.0",
|
||||
"chai-as-promised": "^8.0.1",
|
||||
"chai-like": "^1.1.3",
|
||||
"chai-things": "^0.2.0",
|
||||
"chokidar": "^3.5.1",
|
||||
"chokidar": "^4.0.3",
|
||||
"common-tags": "^1.8.0",
|
||||
"copy-webpack-plugin": "^12.0.0",
|
||||
"deep-object-diff": "^1.1.0",
|
||||
"deep-object-diff": "1.1.0",
|
||||
"docker-delta": "^4.1.0",
|
||||
"docker-progress": "^5.2.3",
|
||||
"docker-progress": "^5.2.4",
|
||||
"dockerode": "^4.0.2",
|
||||
"duration-js": "^4.0.0",
|
||||
"express": "^4.17.1",
|
||||
"express": "^4.21.2",
|
||||
"fork-ts-checker-webpack-plugin": "^9.0.2",
|
||||
"fp-ts": "^2.16.5",
|
||||
"got": "14.4.1",
|
||||
"husky": "^9.0.11",
|
||||
"io-ts": "^2.2.20",
|
||||
"got": "^14.4.6",
|
||||
"husky": "^9.1.7",
|
||||
"io-ts": "2.2.20",
|
||||
"io-ts-reporters": "^2.0.1",
|
||||
"json-mask": "^2.0.0",
|
||||
"JSONStream": "^1.3.5",
|
||||
"knex": "^3.1.0",
|
||||
"lint-staged": "^15.2.2",
|
||||
"lint-staged": "^15.4.3",
|
||||
"livepush": "^3.5.1",
|
||||
"lodash": "^4.17.21",
|
||||
"mdns-resolver": "1.1.0",
|
||||
"memoizee": "^0.4.14",
|
||||
"mocha": "^10.4.0",
|
||||
"mocha-pod": "^2.0.5",
|
||||
"mocha-pod": "^2.0.10",
|
||||
"morgan": "^1.10.0",
|
||||
"network-checker": "^0.1.1",
|
||||
"nock": "^13.1.2",
|
||||
"node-loader": "^2.0.0",
|
||||
"nodemon": "^3.1.0",
|
||||
"pinejs-client-request": "^7.3.5",
|
||||
"network-checker": "^1.0.2",
|
||||
"nock": "^13.5.6",
|
||||
"node-loader": "^2.1.0",
|
||||
"nodemon": "^3.1.9",
|
||||
"pinejs-client-core": "^7.2.0",
|
||||
"pinejs-client-request": "^8.0.1",
|
||||
"pretty-ms": "^7.0.1",
|
||||
"request": "^2.88.2",
|
||||
"resumable-request": "^2.0.1",
|
||||
"rewire": "^7.0.0",
|
||||
"rimraf": "^5.0.0",
|
||||
"rimraf": "^5.0.10",
|
||||
"rwlock": "^5.0.0",
|
||||
"semver": "7.6.3",
|
||||
"shell-quote": "^1.7.2",
|
||||
"shell-quote": "^1.8.2",
|
||||
"sinon": "^18.0.0",
|
||||
"sinon-chai": "^3.7.0",
|
||||
"strict-event-emitter-types": "^2.0.0",
|
||||
"supertest": "^7.0.0",
|
||||
"systeminformation": "^5.22.7",
|
||||
"systeminformation": "^5.25.11",
|
||||
"tar-stream": "^3.1.7",
|
||||
"terser-webpack-plugin": "^5.3.6",
|
||||
"ts-loader": "^9.4.0",
|
||||
"terser-webpack-plugin": "^5.3.11",
|
||||
"ts-loader": "^9.5.2",
|
||||
"ts-node": "^10.0.0",
|
||||
"tsconfig-paths": "^4.1.0",
|
||||
"tsconfig-paths": "^4.2.0",
|
||||
"typed-error": "^3.2.1",
|
||||
"typescript": "^5.5.4",
|
||||
"webpack": "^5.74.0",
|
||||
"webpack-cli": "^5.0.0",
|
||||
"winston": "^3.3.3",
|
||||
"typescript": "^5.7.3",
|
||||
"webpack": "^5.97.1",
|
||||
"webpack-cli": "^5.1.4",
|
||||
"winston": "^3.17.0",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"versionist": {
|
||||
"publishedAt": "2024-12-11T00:28:17.804Z"
|
||||
"publishedAt": "2025-03-12T14:50:33.763Z"
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,6 @@ import _ from 'lodash';
|
||||
import type { PinejsClientRequest } from 'pinejs-client-request';
|
||||
|
||||
import * as config from '../config';
|
||||
import * as deviceConfig from '../device-config';
|
||||
import * as eventTracker from '../event-tracker';
|
||||
import { loadBackupFromMigration } from '../lib/migration';
|
||||
|
||||
@ -64,7 +63,7 @@ export async function healthcheck() {
|
||||
}
|
||||
|
||||
// Check last time target state has been polled
|
||||
const timeSinceLastFetch = process.hrtime(TargetState.lastFetch);
|
||||
const timeSinceLastFetch = process.hrtime(TargetState.lastSuccessfulFetch);
|
||||
const timeSinceLastFetchMs =
|
||||
timeSinceLastFetch[0] * 1000 + timeSinceLastFetch[1] / 1e6;
|
||||
|
||||
@ -332,10 +331,10 @@ async function reportInitialEnv(
|
||||
);
|
||||
}
|
||||
|
||||
const defaultConfig = deviceConfig.getDefaults();
|
||||
const defaultConfig = deviceState.getDefaultConfig();
|
||||
|
||||
const currentConfig = await deviceConfig.getCurrent();
|
||||
const targetConfig = deviceConfig.formatConfigKeys(targetConfigUnformatted);
|
||||
const currentConfig = await deviceState.getCurrentConfig();
|
||||
const targetConfig = deviceState.formatConfigKeys(targetConfigUnformatted);
|
||||
|
||||
if (!currentConfig) {
|
||||
throw new InternalInconsistencyError(
|
||||
|
@ -3,6 +3,7 @@ import url from 'url';
|
||||
import { setTimeout } from 'timers/promises';
|
||||
import Bluebird from 'bluebird';
|
||||
import type StrictEventEmitter from 'strict-event-emitter-types';
|
||||
import { Agent } from 'https';
|
||||
|
||||
import type { TargetState } from '../types/state';
|
||||
import { InternalInconsistencyError } from '../lib/errors';
|
||||
@ -87,7 +88,8 @@ const emitTargetState = (
|
||||
* We set a value rather then being undeclared because having it undefined
|
||||
* adds more overhead to dealing with this value without any benefits.
|
||||
*/
|
||||
export let lastFetch: ReturnType<typeof process.hrtime> = process.hrtime();
|
||||
export let lastSuccessfulFetch: ReturnType<typeof process.hrtime> =
|
||||
process.hrtime();
|
||||
|
||||
/**
|
||||
* Attempts to update the target state
|
||||
@ -101,11 +103,11 @@ export const update = async (
|
||||
): Promise<void> => {
|
||||
await config.initialized();
|
||||
return Bluebird.using(lockGetTarget(), async () => {
|
||||
const { uuid, apiEndpoint, apiTimeout, deviceApiKey } =
|
||||
const { uuid, apiEndpoint, apiRequestTimeout, deviceApiKey } =
|
||||
await config.getMany([
|
||||
'uuid',
|
||||
'apiEndpoint',
|
||||
'apiTimeout',
|
||||
'apiRequestTimeout',
|
||||
'deviceApiKey',
|
||||
]);
|
||||
|
||||
@ -119,6 +121,13 @@ export const update = async (
|
||||
const got = await getGotInstance();
|
||||
|
||||
const { statusCode, headers, body } = await got(endpoint, {
|
||||
retry: { limit: 0 },
|
||||
agent: {
|
||||
https: new Agent({
|
||||
keepAlive: true,
|
||||
timeout: apiRequestTimeout,
|
||||
}),
|
||||
},
|
||||
headers: {
|
||||
Authorization: `Bearer ${deviceApiKey}`,
|
||||
'If-None-Match': cache?.etag,
|
||||
@ -126,12 +135,12 @@ export const update = async (
|
||||
timeout: {
|
||||
// TODO: We use the same default timeout for all of these in order to have a timeout generally
|
||||
// but it would probably make sense to tune them individually
|
||||
lookup: apiTimeout,
|
||||
connect: apiTimeout,
|
||||
secureConnect: apiTimeout,
|
||||
socket: apiTimeout,
|
||||
send: apiTimeout,
|
||||
response: apiTimeout,
|
||||
lookup: apiRequestTimeout,
|
||||
connect: apiRequestTimeout,
|
||||
secureConnect: apiRequestTimeout,
|
||||
socket: apiRequestTimeout,
|
||||
send: apiRequestTimeout,
|
||||
response: apiRequestTimeout,
|
||||
},
|
||||
});
|
||||
|
||||
@ -154,8 +163,6 @@ export const update = async (
|
||||
|
||||
// Emit the target state and update the cache
|
||||
cache.emitted = emitTargetState(cache, force, isFromApi);
|
||||
}).finally(() => {
|
||||
lastFetch = process.hrtime();
|
||||
});
|
||||
};
|
||||
|
||||
@ -188,7 +195,11 @@ const poll = async (
|
||||
await update();
|
||||
// Reset fetchErrors because we successfuly updated
|
||||
fetchErrors = 0;
|
||||
} catch {
|
||||
lastSuccessfulFetch = process.hrtime();
|
||||
} catch (e) {
|
||||
if (!(e instanceof ApiResponseError)) {
|
||||
log.error('Target state poll failed', e);
|
||||
}
|
||||
// Exponential back off if request fails
|
||||
pollInterval = Math.min(appUpdatePollInterval, 15000 * 2 ** fetchErrors);
|
||||
++fetchErrors;
|
||||
|
@ -41,14 +41,17 @@ export let stateReportErrors = 0;
|
||||
type StateReportOpts = {
|
||||
[key in keyof Pick<
|
||||
config.ConfigMap<SchemaTypeKey>,
|
||||
'apiEndpoint' | 'apiTimeout' | 'deviceApiKey' | 'appUpdatePollInterval'
|
||||
| 'apiEndpoint'
|
||||
| 'apiRequestTimeout'
|
||||
| 'deviceApiKey'
|
||||
| 'appUpdatePollInterval'
|
||||
>]: SchemaReturn<key>;
|
||||
};
|
||||
|
||||
type StateReport = { body: Partial<DeviceState>; opts: StateReportOpts };
|
||||
|
||||
async function report({ body, opts }: StateReport) {
|
||||
const { apiEndpoint, apiTimeout, deviceApiKey } = opts;
|
||||
const { apiEndpoint, apiRequestTimeout, deviceApiKey } = opts;
|
||||
|
||||
if (!apiEndpoint) {
|
||||
throw new InternalInconsistencyError(
|
||||
@ -69,7 +72,7 @@ async function report({ body, opts }: StateReport) {
|
||||
|
||||
const [{ statusCode, body: statusMessage, headers }] = await request
|
||||
.patchAsync(endpoint, params)
|
||||
.timeout(apiTimeout);
|
||||
.timeout(apiRequestTimeout);
|
||||
|
||||
if (statusCode < 200 || statusCode >= 300) {
|
||||
throw new StatusError(
|
||||
@ -203,7 +206,7 @@ export async function startReporting() {
|
||||
// Get configs needed to make a report
|
||||
const reportConfigs = (await config.getMany([
|
||||
'apiEndpoint',
|
||||
'apiTimeout',
|
||||
'apiRequestTimeout',
|
||||
'deviceApiKey',
|
||||
'appUpdatePollInterval',
|
||||
])) as StateReportOpts;
|
||||
|
@ -247,6 +247,16 @@ class AppImpl implements App {
|
||||
}
|
||||
}
|
||||
|
||||
// Release locks (if any) for all services before settling state
|
||||
if (state.lock || state.hasLeftoverLocks) {
|
||||
return [
|
||||
generateStep('releaseLock', {
|
||||
appId: this.appId,
|
||||
lock: state.lock,
|
||||
}),
|
||||
];
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
@ -654,6 +664,7 @@ class AppImpl implements App {
|
||||
context.targetApp,
|
||||
needsDownload,
|
||||
servicesLocked,
|
||||
context.rebootBreadcrumbSet,
|
||||
context.appsToLock,
|
||||
context.availableImages,
|
||||
context.networkPairs,
|
||||
@ -682,6 +693,8 @@ class AppImpl implements App {
|
||||
context.appsToLock,
|
||||
context.targetApp.services,
|
||||
servicesLocked,
|
||||
context.rebootBreadcrumbSet,
|
||||
context.bootTime,
|
||||
);
|
||||
}
|
||||
|
||||
@ -761,6 +774,8 @@ class AppImpl implements App {
|
||||
appsToLock: AppsToLockMap,
|
||||
targetServices: Service[],
|
||||
servicesLocked: boolean,
|
||||
rebootBreadcrumbSet: boolean,
|
||||
bootTime: Date,
|
||||
): CompositionStep[] {
|
||||
// Update container metadata if service release has changed
|
||||
if (current.commit !== target.commit) {
|
||||
@ -774,16 +789,38 @@ class AppImpl implements App {
|
||||
return [];
|
||||
}
|
||||
} else if (target.config.running !== current.config.running) {
|
||||
// Take lock for all services before starting/stopping container
|
||||
if (!servicesLocked) {
|
||||
this.services.concat(targetServices).forEach((s) => {
|
||||
appsToLock[target.appId].add(s.serviceName);
|
||||
});
|
||||
return [];
|
||||
}
|
||||
if (target.config.running) {
|
||||
// if the container has a reboot
|
||||
// required label and the boot time is before the creation time, then
|
||||
// return a 'noop' to ensure a reboot happens before starting the container
|
||||
const requiresReboot =
|
||||
checkTruthy(
|
||||
target.config.labels?.['io.balena.update.requires-reboot'],
|
||||
) &&
|
||||
current.createdAt != null &&
|
||||
current.createdAt > bootTime;
|
||||
|
||||
if (requiresReboot && rebootBreadcrumbSet) {
|
||||
// Do not return a noop to allow locks to be released by the
|
||||
// app module
|
||||
return [];
|
||||
} else if (requiresReboot) {
|
||||
return [
|
||||
generateStep('requireReboot', {
|
||||
serviceName: target.serviceName,
|
||||
}),
|
||||
];
|
||||
}
|
||||
|
||||
return [generateStep('start', { target })];
|
||||
} else {
|
||||
// Take lock for all services before stopping container
|
||||
if (!servicesLocked) {
|
||||
this.services.concat(targetServices).forEach((s) => {
|
||||
appsToLock[target.appId].add(s.serviceName);
|
||||
});
|
||||
return [];
|
||||
}
|
||||
return [generateStep('stop', { current })];
|
||||
}
|
||||
} else {
|
||||
@ -796,6 +833,7 @@ class AppImpl implements App {
|
||||
targetApp: App,
|
||||
needsDownload: boolean,
|
||||
servicesLocked: boolean,
|
||||
rebootBreadcrumbSet: boolean,
|
||||
appsToLock: AppsToLockMap,
|
||||
availableImages: UpdateState['availableImages'],
|
||||
networkPairs: Array<ChangingPair<Network>>,
|
||||
@ -813,24 +851,30 @@ class AppImpl implements App {
|
||||
serviceName: target.serviceName,
|
||||
}),
|
||||
];
|
||||
} else if (
|
||||
target != null &&
|
||||
this.dependenciesMetForServiceStart(
|
||||
target,
|
||||
targetApp,
|
||||
availableImages,
|
||||
networkPairs,
|
||||
volumePairs,
|
||||
servicePairs,
|
||||
)
|
||||
) {
|
||||
if (!servicesLocked) {
|
||||
this.services
|
||||
.concat(targetApp.services)
|
||||
.forEach((svc) => appsToLock[target.appId].add(svc.serviceName));
|
||||
return [];
|
||||
} else if (target != null) {
|
||||
if (
|
||||
this.dependenciesMetForServiceStart(
|
||||
target,
|
||||
targetApp,
|
||||
availableImages,
|
||||
networkPairs,
|
||||
volumePairs,
|
||||
servicePairs,
|
||||
)
|
||||
) {
|
||||
if (!servicesLocked) {
|
||||
this.services
|
||||
.concat(targetApp.services)
|
||||
.forEach((svc) => appsToLock[target.appId].add(svc.serviceName));
|
||||
return [];
|
||||
}
|
||||
return [generateStep('start', { target })];
|
||||
} else {
|
||||
// Wait for dependencies to be started unless there is a
|
||||
// reboot breadcrumb set, in which case we need to allow the state
|
||||
// to settle for the reboot to happen
|
||||
return rebootBreadcrumbSet ? [] : [generateStep('noop', {})];
|
||||
}
|
||||
return [generateStep('start', { target })];
|
||||
} else {
|
||||
return [];
|
||||
}
|
||||
@ -881,7 +925,7 @@ class AppImpl implements App {
|
||||
// different to a dependency which is in the servicePairs below, as these
|
||||
// are services which are changing). We could have a dependency which is
|
||||
// starting up, but is not yet running.
|
||||
const depInstallingButNotRunning = _.some(targetApp.services, (svc) => {
|
||||
const depInstallingButNotRunning = _.some(this.services, (svc) => {
|
||||
if (target.dependsOn?.includes(svc.serviceName)) {
|
||||
if (!svc.config.running) {
|
||||
return true;
|
||||
@ -893,11 +937,11 @@ class AppImpl implements App {
|
||||
return false;
|
||||
}
|
||||
|
||||
const depedencyUnmet = _.some(target.dependsOn, (dep) =>
|
||||
const dependencyUnmet = _.some(target.dependsOn, (dep) =>
|
||||
_.some(servicePairs, (pair) => pair.target?.serviceName === dep),
|
||||
);
|
||||
|
||||
if (depedencyUnmet) {
|
||||
if (dependencyUnmet) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -40,6 +40,8 @@ import type {
|
||||
Image,
|
||||
InstancedAppState,
|
||||
} from './types';
|
||||
import { isRebootBreadcrumbSet } from '../lib/reboot';
|
||||
import { getBootTime } from '../lib/fs-utils';
|
||||
|
||||
type ApplicationManagerEventEmitter = StrictEventEmitter<
|
||||
EventEmitter,
|
||||
@ -127,6 +129,7 @@ export async function getRequiredSteps(
|
||||
config.getMany(['localMode', 'delta']),
|
||||
]);
|
||||
const containerIdsByAppId = getAppContainerIds(currentApps);
|
||||
const rebootBreadcrumbSet = await isRebootBreadcrumbSet();
|
||||
|
||||
// Local mode sets the image and volume retention only
|
||||
// if not explicitely set by the caller
|
||||
@ -149,6 +152,7 @@ export async function getRequiredSteps(
|
||||
availableImages,
|
||||
containerIdsByAppId,
|
||||
appLocks: lockRegistry,
|
||||
rebootBreadcrumbSet,
|
||||
});
|
||||
}
|
||||
|
||||
@ -161,6 +165,7 @@ interface InferNextOpts {
|
||||
availableImages: UpdateState['availableImages'];
|
||||
containerIdsByAppId: { [appId: number]: UpdateState['containerIds'] };
|
||||
appLocks: LockRegistry;
|
||||
rebootBreadcrumbSet: boolean;
|
||||
}
|
||||
|
||||
// Calculate the required steps from the current to the target state
|
||||
@ -176,6 +181,7 @@ export async function inferNextSteps(
|
||||
availableImages = [],
|
||||
containerIdsByAppId = {},
|
||||
appLocks = {},
|
||||
rebootBreadcrumbSet = false,
|
||||
}: Partial<InferNextOpts>,
|
||||
) {
|
||||
const currentAppIds = Object.keys(currentApps).map((i) => parseInt(i, 10));
|
||||
@ -184,6 +190,7 @@ export async function inferNextSteps(
|
||||
const withLeftoverLocks = await Promise.all(
|
||||
currentAppIds.map((id) => hasLeftoverLocks(id)),
|
||||
);
|
||||
const bootTime = getBootTime();
|
||||
|
||||
let steps: CompositionStep[] = [];
|
||||
|
||||
@ -245,6 +252,8 @@ export async function inferNextSteps(
|
||||
force,
|
||||
lock: appLocks[id],
|
||||
hasLeftoverLocks: withLeftoverLocks[id],
|
||||
rebootBreadcrumbSet,
|
||||
bootTime,
|
||||
},
|
||||
targetApps[id],
|
||||
),
|
||||
@ -261,6 +270,8 @@ export async function inferNextSteps(
|
||||
force,
|
||||
lock: appLocks[id],
|
||||
hasLeftoverLocks: withLeftoverLocks[id],
|
||||
rebootBreadcrumbSet,
|
||||
bootTime,
|
||||
}),
|
||||
);
|
||||
}
|
||||
@ -287,6 +298,8 @@ export async function inferNextSteps(
|
||||
force,
|
||||
lock: appLocks[id],
|
||||
hasLeftoverLocks: false,
|
||||
rebootBreadcrumbSet,
|
||||
bootTime,
|
||||
},
|
||||
targetApps[id],
|
||||
),
|
||||
|
@ -6,6 +6,7 @@ import * as networkManager from './network-manager';
|
||||
import * as volumeManager from './volume-manager';
|
||||
import * as commitStore from './commit';
|
||||
import { Lockable, cleanLocksForApp } from '../lib/update-lock';
|
||||
import { setRebootBreadcrumb } from '../lib/reboot';
|
||||
import type { DeviceLegacyReport } from '../types/state';
|
||||
import type { CompositionStepAction, CompositionStepT } from './types';
|
||||
import type { Lock } from '../lib/update-lock';
|
||||
@ -157,6 +158,9 @@ export function getExecutors(app: { callbacks: CompositionCallbacks }) {
|
||||
// Clean up any remaining locks
|
||||
await cleanLocksForApp(step.appId);
|
||||
},
|
||||
requireReboot: async (step) => {
|
||||
await setRebootBreadcrumb({ serviceName: step.serviceName });
|
||||
},
|
||||
};
|
||||
|
||||
return executors;
|
||||
|
@ -19,7 +19,7 @@ import {
|
||||
isStatusError,
|
||||
} from '../lib/errors';
|
||||
import * as LogTypes from '../lib/log-types';
|
||||
import { checkInt, isValidDeviceName } from '../lib/validation';
|
||||
import { checkInt, isValidDeviceName, checkTruthy } from '../lib/validation';
|
||||
import { Service } from './service';
|
||||
import type { ServiceStatus } from './types';
|
||||
import { serviceNetworksToDockerNetworks } from './utils';
|
||||
@ -27,6 +27,7 @@ import { serviceNetworksToDockerNetworks } from './utils';
|
||||
import log from '../lib/supervisor-console';
|
||||
import logMonitor from '../logging/monitor';
|
||||
import { setTimeout } from 'timers/promises';
|
||||
import { getBootTime } from '../lib/fs-utils';
|
||||
|
||||
interface ServiceManagerEvents {
|
||||
change: void;
|
||||
@ -233,7 +234,7 @@ export async function remove(service: Service) {
|
||||
}
|
||||
}
|
||||
|
||||
async function create(service: Service) {
|
||||
async function create(service: Service): Promise<Service> {
|
||||
const mockContainerId = config.newUniqueKey();
|
||||
try {
|
||||
const existing = await get(service);
|
||||
@ -242,7 +243,7 @@ async function create(service: Service) {
|
||||
`No containerId provided for service ${service.serviceName} in ServiceManager.updateMetadata. Service: ${service}`,
|
||||
);
|
||||
}
|
||||
return docker.getContainer(existing.containerId);
|
||||
return existing;
|
||||
} catch (e: unknown) {
|
||||
if (!isNotFoundError(e)) {
|
||||
logger.logSystemEvent(LogTypes.installServiceError, {
|
||||
@ -287,7 +288,9 @@ async function create(service: Service) {
|
||||
reportNewStatus(mockContainerId, service, 'Installing');
|
||||
|
||||
const container = await docker.createContainer(conf);
|
||||
service.containerId = container.id;
|
||||
const inspectInfo = await container.inspect();
|
||||
|
||||
service = Service.fromDockerContainer(inspectInfo);
|
||||
|
||||
await Promise.all(
|
||||
_.map((nets || {}).EndpointsConfig, (endpointConfig, name) =>
|
||||
@ -299,7 +302,7 @@ async function create(service: Service) {
|
||||
);
|
||||
|
||||
logger.logSystemEvent(LogTypes.installServiceSuccess, { service });
|
||||
return container;
|
||||
return service;
|
||||
} finally {
|
||||
reportChange(mockContainerId);
|
||||
}
|
||||
@ -310,13 +313,25 @@ export async function start(service: Service) {
|
||||
let containerId: string | null = null;
|
||||
|
||||
try {
|
||||
const container = await create(service);
|
||||
const svc = await create(service);
|
||||
const container = docker.getContainer(svc.containerId!);
|
||||
|
||||
const requiresReboot =
|
||||
checkTruthy(
|
||||
service.config.labels?.['io.balena.update.requires-reboot'],
|
||||
) &&
|
||||
svc.createdAt != null &&
|
||||
svc.createdAt > getBootTime();
|
||||
|
||||
if (requiresReboot) {
|
||||
log.warn(`Skipping start of service ${svc.serviceName} until reboot`);
|
||||
}
|
||||
|
||||
// Exit here if the target state of the service
|
||||
// is set to running: false
|
||||
// is set to running: false or we are waiting for a reboot
|
||||
// QUESTION: should we split the service steps into
|
||||
// 'install' and 'start' instead of doing this?
|
||||
if (service.config.running === false) {
|
||||
if (service.config.running === false || requiresReboot) {
|
||||
return container;
|
||||
}
|
||||
|
||||
|
@ -128,7 +128,6 @@ class ServiceImpl implements Service {
|
||||
service.releaseId = parseInt(appConfig.releaseId, 10);
|
||||
service.serviceId = parseInt(appConfig.serviceId, 10);
|
||||
service.imageName = appConfig.image;
|
||||
service.createdAt = appConfig.createdAt;
|
||||
service.commit = appConfig.commit;
|
||||
service.appUuid = appConfig.appUuid;
|
||||
|
||||
|
@ -12,6 +12,8 @@ export interface UpdateState {
|
||||
hasLeftoverLocks: boolean;
|
||||
lock: Lock | null;
|
||||
force: boolean;
|
||||
rebootBreadcrumbSet: boolean;
|
||||
bootTime: Date;
|
||||
}
|
||||
|
||||
export interface App {
|
||||
|
@ -76,6 +76,7 @@ export interface CompositionStepArgs {
|
||||
appId: string | number;
|
||||
lock: Lock | null;
|
||||
};
|
||||
requireReboot: { serviceName: string };
|
||||
}
|
||||
|
||||
export type CompositionStepAction = keyof CompositionStepArgs;
|
||||
|
@ -90,7 +90,7 @@ export const fnSchema = {
|
||||
'deviceArch',
|
||||
'deviceType',
|
||||
'apiEndpoint',
|
||||
'apiTimeout',
|
||||
'apiRequestTimeout',
|
||||
'registered_at',
|
||||
'deviceId',
|
||||
'version',
|
||||
@ -107,7 +107,7 @@ export const fnSchema = {
|
||||
provisioningApiKey: conf.apiKey,
|
||||
deviceApiKey: conf.deviceApiKey,
|
||||
apiEndpoint: conf.apiEndpoint,
|
||||
apiTimeout: conf.apiTimeout,
|
||||
apiRequestTimeout: conf.apiRequestTimeout,
|
||||
registered_at: conf.registered_at,
|
||||
deviceId: conf.deviceId,
|
||||
supervisorVersion: conf.version,
|
||||
|
@ -12,6 +12,9 @@ export const schemaTypes = {
|
||||
type: t.string,
|
||||
default: '',
|
||||
},
|
||||
/**
|
||||
* The timeout for the supervisor's api
|
||||
*/
|
||||
apiTimeout: {
|
||||
type: PermissiveNumber,
|
||||
default: 15 * 60 * 1000,
|
||||
@ -118,6 +121,13 @@ export const schemaTypes = {
|
||||
type: PermissiveBoolean,
|
||||
default: false,
|
||||
},
|
||||
/**
|
||||
* The timeout for requests to the balenaCloud api
|
||||
*/
|
||||
apiRequestTimeout: {
|
||||
type: PermissiveNumber,
|
||||
default: 59000,
|
||||
},
|
||||
deltaRequestTimeout: {
|
||||
type: PermissiveNumber,
|
||||
default: 59000,
|
||||
@ -218,7 +228,7 @@ export const schemaTypes = {
|
||||
provisioningApiKey: t.union([t.string, NullOrUndefined]),
|
||||
deviceApiKey: t.string,
|
||||
apiEndpoint: t.string,
|
||||
apiTimeout: PermissiveNumber,
|
||||
apiRequestTimeout: PermissiveNumber,
|
||||
registered_at: t.union([PermissiveNumber, NullOrUndefined]),
|
||||
deviceId: t.union([PermissiveNumber, NullOrUndefined]),
|
||||
supervisorVersion: t.union([t.string, t.undefined]),
|
||||
|
@ -4,6 +4,9 @@ export const schema = {
|
||||
mutable: false,
|
||||
removeIfNull: false,
|
||||
},
|
||||
/**
|
||||
* The timeout for the supervisor's api
|
||||
*/
|
||||
apiTimeout: {
|
||||
source: 'config.json',
|
||||
mutable: false,
|
||||
@ -120,6 +123,11 @@ export const schema = {
|
||||
mutable: true,
|
||||
removeIfNull: false,
|
||||
},
|
||||
apiRequestTimeout: {
|
||||
source: 'db',
|
||||
mutable: true,
|
||||
removeIfNull: false,
|
||||
},
|
||||
delta: {
|
||||
source: 'db',
|
||||
mutable: true,
|
||||
|
@ -11,7 +11,6 @@ import { Volume } from '../compose/volume';
|
||||
import * as commitStore from '../compose/commit';
|
||||
import * as config from '../config';
|
||||
import * as db from '../db';
|
||||
import * as deviceConfig from '../device-config';
|
||||
import * as logger from '../logging';
|
||||
import * as images from '../compose/images';
|
||||
import * as volumeManager from '../compose/volume-manager';
|
||||
@ -512,7 +511,7 @@ router.get('/v2/device/tags', async (_req, res) => {
|
||||
});
|
||||
|
||||
router.get('/v2/device/vpn', async (_req, res) => {
|
||||
const conf = await deviceConfig.getCurrent();
|
||||
const conf = await deviceState.getCurrentConfig();
|
||||
// Build VPNInfo
|
||||
const info = {
|
||||
enabled: conf.SUPERVISOR_VPN_CONTROL === 'true',
|
||||
|
@ -1,34 +1,24 @@
|
||||
import _ from 'lodash';
|
||||
import { inspect } from 'util';
|
||||
import { promises as fs } from 'fs';
|
||||
|
||||
import * as config from './config';
|
||||
import * as db from './db';
|
||||
import * as logger from './logging';
|
||||
import * as dbus from './lib/dbus';
|
||||
import type { EnvVarObject } from './types';
|
||||
import { UnitNotLoadedError } from './lib/errors';
|
||||
import { checkInt, checkTruthy } from './lib/validation';
|
||||
import log from './lib/supervisor-console';
|
||||
import * as configUtils from './config/utils';
|
||||
import type { SchemaTypeKey } from './config/schema-type';
|
||||
import { matchesAnyBootConfig } from './config/backends';
|
||||
import type { ConfigBackend } from './config/backends/backend';
|
||||
import { Odmdata } from './config/backends/odmdata';
|
||||
import * as fsUtils from './lib/fs-utils';
|
||||
import { pathOnRoot } from './lib/host-utils';
|
||||
import * as config from '../config';
|
||||
import * as db from '../db';
|
||||
import * as logger from '../logging';
|
||||
import * as dbus from '../lib/dbus';
|
||||
import type { EnvVarObject } from '../types';
|
||||
import { UnitNotLoadedError } from '../lib/errors';
|
||||
import { checkInt, checkTruthy } from '../lib/validation';
|
||||
import log from '../lib/supervisor-console';
|
||||
import { setRebootBreadcrumb } from '../lib/reboot';
|
||||
|
||||
import * as configUtils from '../config/utils';
|
||||
import type { SchemaTypeKey } from '../config/schema-type';
|
||||
import { matchesAnyBootConfig } from '../config/backends';
|
||||
import type { ConfigBackend } from '../config/backends/backend';
|
||||
import { Odmdata } from '../config/backends/odmdata';
|
||||
|
||||
const vpnServiceName = 'openvpn';
|
||||
|
||||
// This indicates the file on the host /tmp directory that
|
||||
// marks the need for a reboot. Since reboot is only triggered for now
|
||||
// by some config changes, we leave this here for now. There is planned
|
||||
// functionality to allow image installs to require reboots, at that moment
|
||||
// this constant can be moved somewhere else
|
||||
const REBOOT_BREADCRUMB = pathOnRoot(
|
||||
'/tmp/balena-supervisor/reboot-after-apply',
|
||||
);
|
||||
|
||||
interface ConfigOption {
|
||||
envVarName: string;
|
||||
varType: string;
|
||||
@ -39,10 +29,7 @@ interface ConfigOption {
|
||||
// FIXME: Bring this and the deviceState and
|
||||
// applicationState steps together
|
||||
export interface ConfigStep {
|
||||
// TODO: This is a bit of a mess, the DeviceConfig class shouldn't
|
||||
// know that the reboot action exists as it is implemented by
|
||||
// DeviceState. Fix this weird circular dependency
|
||||
action: keyof DeviceActionExecutors | 'reboot' | 'noop';
|
||||
action: keyof DeviceActionExecutors | 'noop';
|
||||
humanReadableTarget?: Dictionary<string>;
|
||||
target?: string | Dictionary<string>;
|
||||
}
|
||||
@ -117,10 +104,12 @@ const actionExecutors: DeviceActionExecutors = {
|
||||
await setBootConfig(backend, step.target as Dictionary<string>);
|
||||
}
|
||||
},
|
||||
setRebootBreadcrumb: async () => {
|
||||
// Just create the file. The last step in the target state calculation will check
|
||||
// the file and create a reboot step
|
||||
await fsUtils.touch(REBOOT_BREADCRUMB);
|
||||
setRebootBreadcrumb: async (step) => {
|
||||
const changes =
|
||||
step != null && step.target != null && typeof step.target === 'object'
|
||||
? step.target
|
||||
: {};
|
||||
return setRebootBreadcrumb(changes);
|
||||
},
|
||||
};
|
||||
|
||||
@ -152,6 +141,11 @@ const configKeys: Dictionary<ConfigOption> = {
|
||||
varType: 'bool',
|
||||
defaultValue: 'true',
|
||||
},
|
||||
apiRequestTimeout: {
|
||||
envVarName: 'SUPERVISOR_API_REQUEST_TIMEOUT',
|
||||
varType: 'int',
|
||||
defaultValue: '59000',
|
||||
},
|
||||
delta: {
|
||||
envVarName: 'SUPERVISOR_DELTA',
|
||||
varType: 'bool',
|
||||
@ -210,7 +204,7 @@ const configKeys: Dictionary<ConfigOption> = {
|
||||
},
|
||||
};
|
||||
|
||||
export const validKeys = [
|
||||
const validKeys = [
|
||||
'SUPERVISOR_VPN_CONTROL',
|
||||
'OVERRIDE_LOCK',
|
||||
..._.map(configKeys, 'envVarName'),
|
||||
@ -413,6 +407,7 @@ function getConfigSteps(
|
||||
target: Dictionary<string>,
|
||||
) {
|
||||
const configChanges: Dictionary<string> = {};
|
||||
const rebootingChanges: Dictionary<string> = {};
|
||||
const humanReadableConfigChanges: Dictionary<string> = {};
|
||||
let reboot = false;
|
||||
const steps: ConfigStep[] = [];
|
||||
@ -448,6 +443,9 @@ function getConfigSteps(
|
||||
}
|
||||
if (changingValue != null) {
|
||||
configChanges[key] = changingValue;
|
||||
if ($rebootRequired) {
|
||||
rebootingChanges[key] = changingValue;
|
||||
}
|
||||
humanReadableConfigChanges[envVarName] = changingValue;
|
||||
reboot = $rebootRequired || reboot;
|
||||
}
|
||||
@ -457,7 +455,7 @@ function getConfigSteps(
|
||||
|
||||
if (!_.isEmpty(configChanges)) {
|
||||
if (reboot) {
|
||||
steps.push({ action: 'setRebootBreadcrumb' });
|
||||
steps.push({ action: 'setRebootBreadcrumb', target: rebootingChanges });
|
||||
}
|
||||
|
||||
steps.push({
|
||||
@ -544,24 +542,16 @@ async function getBackendSteps(
|
||||
return [
|
||||
// All backend steps require a reboot except fan control
|
||||
...(steps.length > 0 && rebootRequired
|
||||
? [{ action: 'setRebootBreadcrumb' } as ConfigStep]
|
||||
? [
|
||||
{
|
||||
action: 'setRebootBreadcrumb',
|
||||
} as ConfigStep,
|
||||
]
|
||||
: []),
|
||||
...steps,
|
||||
];
|
||||
}
|
||||
|
||||
async function isRebootRequired() {
|
||||
const hasBreadcrumb = await fsUtils.exists(REBOOT_BREADCRUMB);
|
||||
if (hasBreadcrumb) {
|
||||
const stats = await fs.stat(REBOOT_BREADCRUMB);
|
||||
|
||||
// If the breadcrumb exists and the last modified time is greater than the
|
||||
// boot time, that means we need to reboot
|
||||
return stats.mtime.getTime() > fsUtils.getBootTime().getTime();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export async function getRequiredSteps(
|
||||
currentState: { local?: { config?: EnvVarObject } },
|
||||
targetState: { local?: { config: EnvVarObject } },
|
||||
@ -584,19 +574,6 @@ export async function getRequiredSteps(
|
||||
: await getBackendSteps(current, target)),
|
||||
];
|
||||
|
||||
// Check if there is either no steps, or they are all
|
||||
// noops, and we need to reboot. We want to do this
|
||||
// because in a preloaded setting with no internet
|
||||
// connection, the device will try to start containers
|
||||
// before any boot config has been applied, which can
|
||||
// cause problems
|
||||
const rebootRequired = await isRebootRequired();
|
||||
if (_.every(steps, { action: 'noop' }) && rebootRequired) {
|
||||
steps.push({
|
||||
action: 'reboot',
|
||||
});
|
||||
}
|
||||
|
||||
return steps;
|
||||
}
|
||||
|
||||
@ -642,7 +619,7 @@ export function executeStepAction(
|
||||
step: ConfigStep,
|
||||
opts: DeviceActionExecutorOpts,
|
||||
) {
|
||||
if (step.action !== 'reboot' && step.action !== 'noop') {
|
||||
if (step.action !== 'noop') {
|
||||
return actionExecutors[step.action](step, opts);
|
||||
}
|
||||
}
|
@ -9,7 +9,7 @@ import * as config from '../config';
|
||||
import * as logger from '../logging';
|
||||
|
||||
import * as network from '../network';
|
||||
import * as deviceConfig from '../device-config';
|
||||
import * as deviceConfig from './device-config';
|
||||
|
||||
import * as constants from '../lib/constants';
|
||||
import * as dbus from '../lib/dbus';
|
||||
@ -19,6 +19,7 @@ import * as updateLock from '../lib/update-lock';
|
||||
import { getGlobalApiKey } from '../lib/api-keys';
|
||||
import * as sysInfo from '../lib/system-info';
|
||||
import { log } from '../lib/supervisor-console';
|
||||
import { isRebootRequired } from '../lib/reboot';
|
||||
import { loadTargetFromFile } from './preload';
|
||||
import * as applicationManager from '../compose/application-manager';
|
||||
import * as commitStore from '../compose/commit';
|
||||
@ -26,6 +27,12 @@ import type { InstancedDeviceState } from './target-state';
|
||||
import * as TargetState from './target-state';
|
||||
export { getTarget, setTarget } from './target-state';
|
||||
|
||||
export {
|
||||
formatConfigKeys,
|
||||
getCurrent as getCurrentConfig,
|
||||
getDefaults as getDefaultConfig,
|
||||
} from './device-config';
|
||||
|
||||
import type { DeviceLegacyState, DeviceState, DeviceReport } from '../types';
|
||||
import type {
|
||||
CompositionStepT,
|
||||
@ -512,7 +519,7 @@ export async function executeStepAction(
|
||||
}
|
||||
}
|
||||
|
||||
export async function applyStep(
|
||||
async function applyStep(
|
||||
step: DeviceStateStep<PossibleStepTargets>,
|
||||
{
|
||||
force,
|
||||
@ -609,11 +616,12 @@ export const applyTarget = async ({
|
||||
({ action }) => action === 'noop',
|
||||
);
|
||||
|
||||
let backoff: boolean;
|
||||
const rebootRequired = await isRebootRequired();
|
||||
|
||||
let backoff = false;
|
||||
let steps: Array<DeviceStateStep<PossibleStepTargets>>;
|
||||
|
||||
if (!noConfigSteps) {
|
||||
backoff = false;
|
||||
steps = deviceConfigSteps;
|
||||
} else {
|
||||
const appSteps = await applicationManager.getRequiredSteps(
|
||||
@ -640,6 +648,21 @@ export const applyTarget = async ({
|
||||
}
|
||||
}
|
||||
|
||||
// Check if there is either no steps, or they are all
|
||||
// noops, and we need to reboot. We want to do this
|
||||
// because in a preloaded setting with no internet
|
||||
// connection, the device will try to start containers
|
||||
// before any boot config has been applied, which can
|
||||
// cause problems
|
||||
// For application manager, the reboot breadcrumb should
|
||||
// be set after all downloads are ready and target containers
|
||||
// have been installed
|
||||
if (steps.every(({ action }) => action === 'noop') && rebootRequired) {
|
||||
steps.push({
|
||||
action: 'reboot',
|
||||
});
|
||||
}
|
||||
|
||||
if (_.isEmpty(steps)) {
|
||||
emitAsync('apply-target-state-end', null);
|
||||
if (!intermediate) {
|
||||
|
@ -6,9 +6,9 @@ import { imageFromService } from '../compose/images';
|
||||
import { NumericIdentifier } from '../types';
|
||||
import { setTarget } from './target-state';
|
||||
import * as config from '../config';
|
||||
import * as deviceConfig from '../device-config';
|
||||
import * as eventTracker from '../event-tracker';
|
||||
import * as imageManager from '../compose/images';
|
||||
import * as deviceState from '../device-state';
|
||||
|
||||
import {
|
||||
AppsJsonParseError,
|
||||
@ -126,8 +126,8 @@ export async function loadTargetFromFile(appsPath: string): Promise<boolean> {
|
||||
await imageManager.save(image);
|
||||
}
|
||||
|
||||
const deviceConf = await deviceConfig.getCurrent();
|
||||
const formattedConf = deviceConfig.formatConfigKeys(preloadState.config);
|
||||
const deviceConf = await deviceState.getCurrentConfig();
|
||||
const formattedConf = deviceState.formatConfigKeys(preloadState.config);
|
||||
const localState = {
|
||||
[uuid]: {
|
||||
name: '',
|
||||
|
@ -6,7 +6,7 @@ import * as config from '../config';
|
||||
import * as db from '../db';
|
||||
|
||||
import * as globalEventBus from '../event-bus';
|
||||
import * as deviceConfig from '../device-config';
|
||||
import * as deviceConfig from './device-config';
|
||||
|
||||
import { TargetStateError } from '../lib/errors';
|
||||
import { takeGlobalLockRO, takeGlobalLockRW } from '../lib/process-lock';
|
||||
|
@ -111,10 +111,10 @@ export const exchangeKeyAndGetDevice = async (
|
||||
opts: Partial<KeyExchangeOpts>,
|
||||
): Promise<Device> => {
|
||||
const uuid = opts.uuid;
|
||||
const apiTimeout = opts.apiTimeout;
|
||||
if (!(uuid && apiTimeout)) {
|
||||
const apiRequestTimeout = opts.apiRequestTimeout;
|
||||
if (!(uuid && apiRequestTimeout)) {
|
||||
throw new InternalInconsistencyError(
|
||||
'UUID and apiTimeout should be defined in exchangeKeyAndGetDevice',
|
||||
'UUID and apiRequestTimeout should be defined in exchangeKeyAndGetDevice',
|
||||
);
|
||||
}
|
||||
|
||||
@ -122,7 +122,12 @@ export const exchangeKeyAndGetDevice = async (
|
||||
// valid, because if it is then we can just use that
|
||||
if (opts.deviceApiKey != null) {
|
||||
try {
|
||||
return await fetchDevice(balenaApi, uuid, opts.deviceApiKey, apiTimeout);
|
||||
return await fetchDevice(
|
||||
balenaApi,
|
||||
uuid,
|
||||
opts.deviceApiKey,
|
||||
apiRequestTimeout,
|
||||
);
|
||||
} catch (e) {
|
||||
if (e instanceof DeviceNotFoundError) {
|
||||
// do nothing...
|
||||
@ -146,7 +151,7 @@ export const exchangeKeyAndGetDevice = async (
|
||||
balenaApi,
|
||||
uuid,
|
||||
opts.provisioningApiKey,
|
||||
apiTimeout,
|
||||
apiRequestTimeout,
|
||||
);
|
||||
} catch {
|
||||
throw new ExchangeKeyError(`Couldn't fetch device with provisioning key`);
|
||||
@ -165,7 +170,7 @@ export const exchangeKeyAndGetDevice = async (
|
||||
Authorization: `Bearer ${opts.provisioningApiKey}`,
|
||||
},
|
||||
})
|
||||
.timeout(apiTimeout);
|
||||
.timeout(apiRequestTimeout);
|
||||
|
||||
if (res.statusCode !== 200) {
|
||||
throw new ExchangeKeyError(
|
||||
@ -220,7 +225,7 @@ export const provision = async (
|
||||
osVariant: opts.osVariant,
|
||||
macAddress: opts.macAddress,
|
||||
}),
|
||||
).timeout(opts.apiTimeout);
|
||||
).timeout(opts.apiRequestTimeout);
|
||||
} catch (err) {
|
||||
if (
|
||||
err instanceof deviceRegister.ApiError &&
|
||||
|
@ -128,7 +128,7 @@ export function containerContractsFulfilled(
|
||||
].map((c) => new Contract(c)),
|
||||
);
|
||||
|
||||
const solution = blueprint.reproduce(universe);
|
||||
const solution = [...blueprint.reproduce(universe)];
|
||||
|
||||
if (solution.length > 1) {
|
||||
throw new InternalInconsistencyError(
|
||||
|
@ -1,22 +1,23 @@
|
||||
import type { ProgressCallback } from 'docker-progress';
|
||||
import { DockerProgress } from 'docker-progress';
|
||||
import type { ProgressCallback } from 'docker-progress';
|
||||
import Dockerode from 'dockerode';
|
||||
import _ from 'lodash';
|
||||
import memoizee from 'memoizee';
|
||||
|
||||
import { applyDelta, OutOfSyncError } from 'docker-delta';
|
||||
|
||||
import type { SchemaReturn } from '../config/schema-type';
|
||||
import log from './supervisor-console';
|
||||
import { envArrayToObject } from './conversions';
|
||||
import * as request from './request';
|
||||
import {
|
||||
DeltaStillProcessingError,
|
||||
ImageAuthenticationError,
|
||||
InvalidNetGatewayError,
|
||||
DeltaServerError,
|
||||
DeltaApplyError,
|
||||
isStatusError,
|
||||
} from './errors';
|
||||
import * as request from './request';
|
||||
import type { EnvVarObject } from '../types';
|
||||
|
||||
import log from './supervisor-console';
|
||||
import type { SchemaReturn } from '../config/schema-type';
|
||||
|
||||
export type FetchOptions = SchemaReturn<'fetchOptions'>;
|
||||
export type DeltaFetchOptions = FetchOptions & {
|
||||
@ -41,6 +42,18 @@ type ImageNameParts = {
|
||||
// (10 mins)
|
||||
const DELTA_TOKEN_TIMEOUT = 10 * 60 * 1000;
|
||||
|
||||
// How many times to retry a v3 delta apply before falling back to a regular pull.
|
||||
// A delta is applied to the base image when pulling, so a failure could be due to
|
||||
// "layers from manifest don't match image configuration", which can occur before
|
||||
// or after downloading delta image layers.
|
||||
//
|
||||
// Other causes of failure have not been documented as clearly as "layers from manifest"
|
||||
// but could manifest as well, though unclear if they occur before, after, or during
|
||||
// downloading delta image layers.
|
||||
//
|
||||
// See: https://github.com/balena-os/balena-engine/blob/master/distribution/pull_v2.go#L43
|
||||
const DELTA_APPLY_RETRY_COUNT = 3;
|
||||
|
||||
export const docker = new Dockerode();
|
||||
export const dockerProgress = new DockerProgress({
|
||||
docker,
|
||||
@ -113,11 +126,7 @@ export async function fetchDeltaWithProgress(
|
||||
onProgress: ProgressCallback,
|
||||
serviceName: string,
|
||||
): Promise<string> {
|
||||
const deltaSourceId =
|
||||
deltaOpts.deltaSourceId != null
|
||||
? deltaOpts.deltaSourceId
|
||||
: deltaOpts.deltaSource;
|
||||
|
||||
const deltaSourceId = deltaOpts.deltaSourceId ?? deltaOpts.deltaSource;
|
||||
const timeout = deltaOpts.deltaApplyTimeout;
|
||||
|
||||
const logFn = (str: string) =>
|
||||
@ -143,7 +152,7 @@ export async function fetchDeltaWithProgress(
|
||||
}
|
||||
|
||||
// Since the supevisor never calls this function with a source anymore,
|
||||
// this should never happen, but w ehandle it anyway
|
||||
// this should never happen, but we handle it anyway
|
||||
if (deltaOpts.deltaSource == null) {
|
||||
logFn('Falling back to regular pull due to lack of a delta source');
|
||||
return fetchImageWithProgress(imgDest, deltaOpts, onProgress);
|
||||
@ -210,6 +219,18 @@ export async function fetchDeltaWithProgress(
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
// If 400s status code, throw a more specific error & revert immediately to a regular pull,
|
||||
// unless the code is 401 Unauthorized, in which case we should surface the error by retrying
|
||||
// the delta server request, instead of falling back to a regular pull immediately.
|
||||
if (res.statusCode >= 400 && res.statusCode < 500) {
|
||||
if (res.statusCode === 401) {
|
||||
throw new Error(
|
||||
`Got ${res.statusCode} when requesting an image from delta server: ${res.statusMessage}`,
|
||||
);
|
||||
} else {
|
||||
throw new DeltaServerError(res.statusCode, res.statusMessage);
|
||||
}
|
||||
}
|
||||
if (res.statusCode !== 200) {
|
||||
throw new Error(
|
||||
`Got ${res.statusCode} when requesting v3 delta from delta server.`,
|
||||
@ -225,24 +246,62 @@ export async function fetchDeltaWithProgress(
|
||||
`Got an error when parsing delta server response for v3 delta: ${e}`,
|
||||
);
|
||||
}
|
||||
id = await applyBalenaDelta(name, token, onProgress, logFn);
|
||||
// Try to apply delta DELTA_APPLY_RETRY_COUNT times, then throw DeltaApplyError
|
||||
let lastError: Error | undefined = undefined;
|
||||
for (
|
||||
let tryCount = 0;
|
||||
tryCount < DELTA_APPLY_RETRY_COUNT;
|
||||
tryCount++
|
||||
) {
|
||||
try {
|
||||
id = await applyBalenaDelta(name, token, onProgress, logFn);
|
||||
break;
|
||||
} catch (e) {
|
||||
if (isStatusError(e)) {
|
||||
// A status error during delta pull indicates network issues,
|
||||
// so we should throw an error to the handler that indicates that
|
||||
// the delta pull should be retried until network issues are resolved,
|
||||
// rather than falling back to a regular pull.
|
||||
throw e;
|
||||
}
|
||||
lastError = e as Error;
|
||||
logFn(
|
||||
`Delta apply failed, retrying (${tryCount + 1}/${DELTA_APPLY_RETRY_COUNT})...`,
|
||||
);
|
||||
}
|
||||
}
|
||||
if (lastError) {
|
||||
throw new DeltaApplyError(lastError.message);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported delta version: ${deltaOpts.deltaVersion}`);
|
||||
}
|
||||
} catch (e) {
|
||||
// Log appropriate message based on error type
|
||||
if (e instanceof OutOfSyncError) {
|
||||
logFn('Falling back to regular pull due to delta out of sync error');
|
||||
return await fetchImageWithProgress(imgDest, deltaOpts, onProgress);
|
||||
} else if (e instanceof DeltaServerError) {
|
||||
logFn(
|
||||
`Falling back to regular pull due to delta server error (${e.statusCode})${e.statusMessage ? `: ${e.statusMessage}` : ''}`,
|
||||
);
|
||||
} else if (e instanceof DeltaApplyError) {
|
||||
// A delta apply error is raised from the Engine and doesn't have a status code
|
||||
logFn(
|
||||
`Falling back to regular pull due to delta apply error ${e.message ? `: ${e.message}` : ''}`,
|
||||
);
|
||||
} else {
|
||||
logFn(`Delta failed with ${e}`);
|
||||
throw e;
|
||||
}
|
||||
|
||||
// For handled errors, fall back to regular pull
|
||||
return fetchImageWithProgress(imgDest, deltaOpts, onProgress);
|
||||
}
|
||||
|
||||
logFn(`Delta applied successfully`);
|
||||
return id;
|
||||
return id!;
|
||||
}
|
||||
|
||||
export async function fetchImageWithProgress(
|
||||
|
@ -70,6 +70,13 @@ export class InvalidNetGatewayError extends TypedError {}
|
||||
|
||||
export class DeltaStillProcessingError extends TypedError {}
|
||||
|
||||
export class DeltaServerError extends StatusError {}
|
||||
export class DeltaApplyError extends Error {
|
||||
constructor(message?: string) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
|
||||
export class UpdatesLockedError extends TypedError {}
|
||||
|
||||
export function isHttpConflictError(err: { statusCode: number }): boolean {
|
||||
|
@ -87,5 +87,4 @@ export const touch = (file: string, time = new Date()) =>
|
||||
);
|
||||
|
||||
// Get the system boot time as a Date object
|
||||
export const getBootTime = () =>
|
||||
new Date(new Date().getTime() - uptime() * 1000);
|
||||
export const getBootTime = () => new Date(Date.now() - uptime() * 1000);
|
||||
|
40
src/lib/reboot.ts
Normal file
40
src/lib/reboot.ts
Normal file
@ -0,0 +1,40 @@
|
||||
import { pathOnRoot } from '../lib/host-utils';
|
||||
import * as fsUtils from '../lib/fs-utils';
|
||||
import { promises as fs } from 'fs';
|
||||
import * as logger from '../logging';
|
||||
|
||||
// This indicates the file on the host /tmp directory that
|
||||
// marks the need for a reboot. Since reboot is only triggered for now
|
||||
// by some config changes, we leave this here for now. There is planned
|
||||
// functionality to allow image installs to require reboots, at that moment
|
||||
// this constant can be moved somewhere else
|
||||
const REBOOT_BREADCRUMB = pathOnRoot(
|
||||
'/tmp/balena-supervisor/reboot-after-apply',
|
||||
);
|
||||
|
||||
export async function setRebootBreadcrumb(source: Dictionary<any> = {}) {
|
||||
// Just create the file. The last step in the target state calculation will check
|
||||
// the file and create a reboot step
|
||||
await fsUtils.touch(REBOOT_BREADCRUMB);
|
||||
logger.logSystemMessage(
|
||||
`Reboot has been scheduled to apply changes: ${JSON.stringify(source)}`,
|
||||
{},
|
||||
'Reboot scheduled',
|
||||
);
|
||||
}
|
||||
|
||||
export async function isRebootBreadcrumbSet() {
|
||||
return await fsUtils.exists(REBOOT_BREADCRUMB);
|
||||
}
|
||||
|
||||
export async function isRebootRequired() {
|
||||
const hasBreadcrumb = await fsUtils.exists(REBOOT_BREADCRUMB);
|
||||
if (hasBreadcrumb) {
|
||||
const stats = await fs.stat(REBOOT_BREADCRUMB);
|
||||
|
||||
// If the breadcrumb exists and the last modified time is greater than the
|
||||
// boot time, that means we need to reboot
|
||||
return stats.mtime.getTime() > fsUtils.getBootTime().getTime();
|
||||
}
|
||||
return false;
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
import _ from 'lodash';
|
||||
import { promises as fs, watch } from 'fs';
|
||||
import networkCheck from 'network-checker';
|
||||
import { checkHost as checkNetHost, monitor } from 'network-checker';
|
||||
import type { ConnectOptions, MonitorChangeFunction } from 'network-checker';
|
||||
import os from 'os';
|
||||
import url from 'url';
|
||||
|
||||
@ -20,21 +21,16 @@ const networkPattern = {
|
||||
let isConnectivityCheckPaused = false;
|
||||
let isConnectivityCheckEnabled = true;
|
||||
|
||||
function checkHost(
|
||||
opts: networkCheck.ConnectOptions,
|
||||
): boolean | PromiseLike<boolean> {
|
||||
async function checkHost(opts: ConnectOptions): Promise<boolean> {
|
||||
return (
|
||||
!isConnectivityCheckEnabled ||
|
||||
isConnectivityCheckPaused ||
|
||||
networkCheck.checkHost(opts)
|
||||
(await checkNetHost(opts))
|
||||
);
|
||||
}
|
||||
|
||||
function customMonitor(
|
||||
options: networkCheck.ConnectOptions,
|
||||
fn: networkCheck.MonitorChangeFunction,
|
||||
) {
|
||||
return networkCheck.monitor(checkHost, options, fn);
|
||||
function customMonitor(options: ConnectOptions, fn: MonitorChangeFunction) {
|
||||
return monitor(checkHost, options, fn);
|
||||
}
|
||||
|
||||
export function enableCheck(enable: boolean) {
|
||||
@ -60,7 +56,7 @@ export const startConnectivityCheck = _.once(
|
||||
async (
|
||||
apiEndpoint: string,
|
||||
enable: boolean,
|
||||
onChangeCallback?: networkCheck.MonitorChangeFunction,
|
||||
onChangeCallback?: MonitorChangeFunction,
|
||||
) => {
|
||||
enableConnectivityCheck(enable);
|
||||
if (!apiEndpoint) {
|
||||
|
@ -821,9 +821,9 @@ describe('compose/application-manager', () => {
|
||||
containerIdsByAppId,
|
||||
},
|
||||
);
|
||||
expectSteps('noop', steps2, 1);
|
||||
|
||||
// No other steps
|
||||
expect(steps2).to.have.length(1);
|
||||
expect(steps2.every((s) => s.action === 'noop'));
|
||||
|
||||
/**
|
||||
* Only start target services after both images downloaded
|
||||
@ -932,7 +932,7 @@ describe('compose/application-manager', () => {
|
||||
);
|
||||
|
||||
// Only noop steps should be seen at this point
|
||||
expect(steps.filter((s) => s.action !== 'noop')).to.have.lengthOf(0);
|
||||
expect(steps.every((s) => s.action === 'noop'));
|
||||
});
|
||||
|
||||
it('infers to kill several services as long as there is no unmet dependency', async () => {
|
||||
@ -1099,7 +1099,7 @@ describe('compose/application-manager', () => {
|
||||
.that.deep.includes({ serviceName: 'dep' });
|
||||
|
||||
// No more steps until the first container has been started
|
||||
expect(nextSteps).to.have.lengthOf(0);
|
||||
expect(nextSteps.every((s) => s.action === 'noop'));
|
||||
});
|
||||
|
||||
it('infers to start a service once its dependency has been met', async () => {
|
||||
|
@ -5,7 +5,7 @@ import type { SinonStub, SinonSpy } from 'sinon';
|
||||
import { stub, spy } from 'sinon';
|
||||
import { expect } from 'chai';
|
||||
|
||||
import * as deviceConfig from '~/src/device-config';
|
||||
import * as deviceConfig from '~/src/device-state/device-config';
|
||||
import * as fsUtils from '~/lib/fs-utils';
|
||||
import * as logger from '~/src/logging';
|
||||
import { Extlinux } from '~/src/config/backends/extlinux';
|
||||
@ -84,6 +84,7 @@ describe('device-config', () => {
|
||||
SUPERVISOR_LOCAL_MODE: 'false',
|
||||
SUPERVISOR_CONNECTIVITY_CHECK: 'true',
|
||||
SUPERVISOR_LOG_CONTROL: 'true',
|
||||
SUPERVISOR_API_REQUEST_TIMEOUT: '59000',
|
||||
SUPERVISOR_DELTA: 'false',
|
||||
SUPERVISOR_DELTA_REQUEST_TIMEOUT: '59000',
|
||||
SUPERVISOR_DELTA_APPLY_TIMEOUT: '0',
|
||||
|
@ -335,7 +335,7 @@ describe('ApiBinder', () => {
|
||||
|
||||
before(async () => {
|
||||
await initModels(components, '/config-apibinder.json');
|
||||
previousLastFetch = TargetState.lastFetch;
|
||||
previousLastFetch = TargetState.lastSuccessfulFetch;
|
||||
});
|
||||
|
||||
after(async () => {
|
||||
|
@ -8,7 +8,7 @@ import { expect } from 'chai';
|
||||
import * as TargetState from '~/src/api-binder/poll';
|
||||
import Log from '~/lib/supervisor-console';
|
||||
import * as request from '~/lib/request';
|
||||
import * as deviceConfig from '~/src/device-config';
|
||||
import * as deviceConfig from '~/src/device-state/device-config';
|
||||
import { UpdatesLockedError } from '~/lib/errors';
|
||||
import { setTimeout } from 'timers/promises';
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
import chai from 'chai';
|
||||
import chaiAsPromised from 'chai-as-promised';
|
||||
import sinonChai from 'sinon-chai';
|
||||
import chaiThings from 'chai-things';
|
||||
import chaiLike from 'chai-like';
|
||||
@ -14,9 +13,11 @@ import chaiLike from 'chai-like';
|
||||
* If unsure whether to add to global fixtures, refer to the chart above.
|
||||
* Also, avoid setting global mutable variables here.
|
||||
*/
|
||||
export const mochaGlobalSetup = function () {
|
||||
export const mochaGlobalSetup = async function () {
|
||||
console.log('Setting up global fixtures for tests...');
|
||||
|
||||
const { default: chaiAsPromised } = await import('chai-as-promised');
|
||||
|
||||
/* Setup chai assertion plugins */
|
||||
chai.use(chaiAsPromised);
|
||||
chai.use(sinonChai);
|
||||
|
@ -21,6 +21,8 @@ const defaultContext = {
|
||||
downloading: [] as string[],
|
||||
lock: null,
|
||||
hasLeftoverLocks: false,
|
||||
rebootBreadcrumbSet: false,
|
||||
bootTime: new Date(Date.now() - 30 * 60 * 1000), // 30 minutes ago
|
||||
};
|
||||
|
||||
const mockLock: Lock = {
|
||||
@ -348,7 +350,6 @@ describe('compose/app', () => {
|
||||
target,
|
||||
);
|
||||
|
||||
expect(recreateVolumeSteps).to.have.length(1);
|
||||
expectSteps('createVolume', recreateVolumeSteps);
|
||||
|
||||
// Step 5: takeLock
|
||||
@ -1294,22 +1295,23 @@ describe('compose/app', () => {
|
||||
.to.deep.include({ serviceName: 'main' });
|
||||
});
|
||||
|
||||
it('should not try to start a container which has exited and has restart policy of no', async () => {
|
||||
it('should not try to start a container which has exited', async () => {
|
||||
// Container is a "run once" type of service so it has exitted.
|
||||
const current = createApp({
|
||||
services: [
|
||||
await createService(
|
||||
{ composition: { restart: 'no' }, running: false },
|
||||
{ composition: { restart: 'yes' }, running: false },
|
||||
{ state: { containerId: 'run_once' } },
|
||||
),
|
||||
],
|
||||
networks: [DEFAULT_NETWORK],
|
||||
});
|
||||
|
||||
// Now test that another start step is not added on this service
|
||||
const target = createApp({
|
||||
services: [
|
||||
await createService(
|
||||
{ composition: { restart: 'no' }, running: false },
|
||||
{ composition: { restart: 'always' }, running: false },
|
||||
{ state: { containerId: 'run_once' } },
|
||||
),
|
||||
],
|
||||
@ -1317,6 +1319,7 @@ describe('compose/app', () => {
|
||||
});
|
||||
|
||||
const steps = current.nextStepsForAppUpdate(defaultContext, target);
|
||||
expect(steps.length).to.equal(0);
|
||||
expectNoStep('start', steps);
|
||||
});
|
||||
|
||||
@ -1472,6 +1475,83 @@ describe('compose/app', () => {
|
||||
.that.deep.includes({ serviceName: 'main' });
|
||||
});
|
||||
|
||||
it('should not start a container when it depends on a service that is not running', async () => {
|
||||
const current = createApp({
|
||||
services: [
|
||||
await createService(
|
||||
{
|
||||
running: false,
|
||||
appId: 1,
|
||||
serviceName: 'dep',
|
||||
},
|
||||
{
|
||||
state: {
|
||||
containerId: 'dep-id',
|
||||
},
|
||||
},
|
||||
),
|
||||
],
|
||||
networks: [DEFAULT_NETWORK],
|
||||
});
|
||||
const target = createApp({
|
||||
services: [
|
||||
await createService({
|
||||
appId: 1,
|
||||
serviceName: 'main',
|
||||
composition: {
|
||||
depends_on: ['dep'],
|
||||
},
|
||||
}),
|
||||
await createService({
|
||||
appId: 1,
|
||||
serviceName: 'dep',
|
||||
}),
|
||||
],
|
||||
networks: [DEFAULT_NETWORK],
|
||||
isTarget: true,
|
||||
});
|
||||
|
||||
const availableImages = [
|
||||
createImage({ appId: 1, serviceName: 'main', name: 'main-image' }),
|
||||
createImage({ appId: 1, serviceName: 'dep', name: 'dep-image' }),
|
||||
];
|
||||
// As service is already being installed, lock for target should have been taken
|
||||
const contextWithImages = {
|
||||
...defaultContext,
|
||||
...{ availableImages },
|
||||
lock: mockLock,
|
||||
};
|
||||
|
||||
// Only one start step and it should be that of the 'dep' service
|
||||
const stepsToIntermediate = current.nextStepsForAppUpdate(
|
||||
contextWithImages,
|
||||
target,
|
||||
);
|
||||
expectNoStep('start', stepsToIntermediate);
|
||||
expectSteps('noop', stepsToIntermediate);
|
||||
|
||||
// we now make our current state have the 'dep' service as started...
|
||||
const intermediate = createApp({
|
||||
services: [
|
||||
await createService(
|
||||
{ appId: 1, serviceName: 'dep' },
|
||||
{ state: { containerId: 'dep-id' } },
|
||||
),
|
||||
],
|
||||
networks: [DEFAULT_NETWORK],
|
||||
});
|
||||
|
||||
// we should now see a start for the 'main' service...
|
||||
const stepsToTarget = intermediate.nextStepsForAppUpdate(
|
||||
{ ...contextWithImages, ...{ containerIds: { dep: 'dep-id' } } },
|
||||
target,
|
||||
);
|
||||
const [startMainStep] = expectSteps('start', stepsToTarget);
|
||||
expect(startMainStep)
|
||||
.to.have.property('target')
|
||||
.that.deep.includes({ serviceName: 'main' });
|
||||
});
|
||||
|
||||
it('should not create a start step when all that changes is a running state', async () => {
|
||||
const contextWithImages = {
|
||||
...defaultContext,
|
||||
@ -1993,7 +2073,7 @@ describe('compose/app', () => {
|
||||
target,
|
||||
);
|
||||
expectNoStep('start', steps);
|
||||
expectSteps('noop', steps, 1);
|
||||
expectSteps('noop', steps, 1, Infinity);
|
||||
|
||||
// Take lock before starting once downloads complete
|
||||
const steps2 = current.nextStepsForAppUpdate(
|
||||
@ -2033,6 +2113,128 @@ describe('compose/app', () => {
|
||||
);
|
||||
expectSteps('start', steps3, 2);
|
||||
});
|
||||
|
||||
it('should set the reboot breadcrumb after a service with `requires-reboot` has been installed', async () => {
|
||||
// Container is a "run once" type of service so it has exitted.
|
||||
const current = createApp({
|
||||
services: [
|
||||
await createService(
|
||||
{
|
||||
labels: { 'io.balena.update.requires-reboot': 'true' },
|
||||
running: false,
|
||||
},
|
||||
{ state: { createdAt: new Date(), status: 'Installed' } },
|
||||
),
|
||||
],
|
||||
networks: [DEFAULT_NETWORK],
|
||||
});
|
||||
|
||||
// Now test that another start step is not added on this service
|
||||
const target = createApp({
|
||||
services: [
|
||||
await createService({
|
||||
labels: { 'io.balena.update.requires-reboot': 'true' },
|
||||
running: true,
|
||||
}),
|
||||
],
|
||||
isTarget: true,
|
||||
});
|
||||
|
||||
const steps = current.nextStepsForAppUpdate(
|
||||
{
|
||||
...defaultContext,
|
||||
rebootBreadcrumbSet: false,
|
||||
// 30 minutes ago
|
||||
bootTime: new Date(Date.now() - 30 * 60 * 1000),
|
||||
},
|
||||
target,
|
||||
);
|
||||
expect(steps.length).to.equal(1);
|
||||
expectSteps('requireReboot', steps);
|
||||
});
|
||||
|
||||
it('should not try to start a container with `requires-reboot` if the reboot has not taken place yet', async () => {
|
||||
// Container is a "run once" type of service so it has exitted.
|
||||
const current = createApp({
|
||||
services: [
|
||||
await createService(
|
||||
{
|
||||
labels: { 'io.balena.update.requires-reboot': 'true' },
|
||||
running: false,
|
||||
},
|
||||
{ state: { createdAt: new Date(), status: 'Installed' } },
|
||||
),
|
||||
],
|
||||
networks: [DEFAULT_NETWORK],
|
||||
});
|
||||
|
||||
// Now test that another start step is not added on this service
|
||||
const target = createApp({
|
||||
services: [
|
||||
await createService({
|
||||
labels: { 'io.balena.update.requires-reboot': 'true' },
|
||||
running: true,
|
||||
}),
|
||||
],
|
||||
isTarget: true,
|
||||
});
|
||||
|
||||
const steps = current.nextStepsForAppUpdate(
|
||||
{
|
||||
...defaultContext,
|
||||
rebootBreadcrumbSet: true,
|
||||
bootTime: new Date(Date.now() - 30 * 60 * 1000),
|
||||
},
|
||||
target,
|
||||
);
|
||||
expect(steps.length).to.equal(0);
|
||||
expectNoStep('start', steps);
|
||||
});
|
||||
|
||||
it('should start a container with `requires-reboot` after reboot has taken place', async () => {
|
||||
// Container is a "run once" type of service so it has exitted.
|
||||
const current = createApp({
|
||||
services: [
|
||||
await createService(
|
||||
{
|
||||
labels: { 'io.balena.update.requires-reboot': 'true' },
|
||||
running: false,
|
||||
},
|
||||
// Container was created 5 minutes ago
|
||||
{
|
||||
state: {
|
||||
createdAt: new Date(Date.now() - 5 * 60 * 1000),
|
||||
status: 'Installed',
|
||||
},
|
||||
},
|
||||
),
|
||||
],
|
||||
networks: [DEFAULT_NETWORK],
|
||||
});
|
||||
|
||||
// Now test that another start step is not added on this service
|
||||
const target = createApp({
|
||||
services: [
|
||||
await createService({
|
||||
labels: { 'io.balena.update.requires-reboot': 'true' },
|
||||
running: true,
|
||||
}),
|
||||
],
|
||||
isTarget: true,
|
||||
});
|
||||
|
||||
const steps = current.nextStepsForAppUpdate(
|
||||
{
|
||||
...defaultContext,
|
||||
rebootBreadcrumbSet: true,
|
||||
// Reboot just happened
|
||||
bootTime: new Date(),
|
||||
},
|
||||
target,
|
||||
);
|
||||
expect(steps.length).to.equal(1);
|
||||
expectSteps('start', steps);
|
||||
});
|
||||
});
|
||||
|
||||
describe('image state behavior', () => {
|
||||
@ -2197,5 +2399,19 @@ describe('compose/app', () => {
|
||||
const [releaseLockStep] = expectSteps('releaseLock', steps, 1);
|
||||
expect(releaseLockStep).to.have.property('appId').that.equals(1);
|
||||
});
|
||||
|
||||
it('should infer a releaseLock step when removing an app', async () => {
|
||||
const current = createApp({
|
||||
services: [],
|
||||
networks: [],
|
||||
});
|
||||
|
||||
const steps = current.stepsToRemoveApp({
|
||||
...defaultContext,
|
||||
lock: mockLock,
|
||||
});
|
||||
const [releaseLockStep] = expectSteps('releaseLock', steps, 1);
|
||||
expect(releaseLockStep).to.have.property('appId').that.equals(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user