mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-02-25 11:12:47 +00:00
Applying a delta update consists of two parts: 1. The request to the delta server for the delta payload (an rsync batch file, plus some prepended Docker metadata). The response is a redirect to a URL that contains the delta (currently S3). 2. The request for the actual download of the delta. The response is streamed directly to rsync, which applies it onto the mounted root filesystem of the final image. The first step may take a while as it may trigger the generation of the delta if the request is the first one for this combination of src/dest image and the images are large. If the request times out, either because of the delta server taking too long to respond or bad network, the Supervisor automatically schedules a retry to be performed after a while. Currently, similar behaviour applies to the second step as well -- if the request fails, we immediately bail out and the Supervisor schedules a retry of the whole process (i.e. from step 1). But in this case it means we might have downloaded and applied some or most of the delta when a socket timeout occurs causing us to start all over again, wasting time and bandwidth. This commit splits the process into the two discreet steps and improves the behaviour on the second step. Specifically: - makes the Supervisor try to resume the delta download request several times before it bails out and starts the process all over again. - removes arbitrary timeout which applied over the whole process and meant some deltas would never manage to be applied (because of large delta size and low network bandwidth). - makes sure any launched rsync processes always exit and any opened streams consumed and closed. Most of the improvements are in the two dependencies linked below -- `resumable-request` and `node-docker-delta` -- and this commit merely combines the updated versions of these modules. Change-Type: minor Connects-To: #140 Depends-On: https://github.com/resin-io/node-docker-delta/pull/19 Depends-On: https://github.com/resin-io-modules/resumable-request/pull/2
52 lines
1.5 KiB
CoffeeScript
52 lines
1.5 KiB
CoffeeScript
config = require './config'
|
|
PlatformAPI = require 'pinejs-client'
|
|
Promise = require 'bluebird'
|
|
request = require 'request'
|
|
resumable = require 'resumable-request'
|
|
url = require 'url'
|
|
osRelease = require './lib/os-release'
|
|
|
|
osVersion = osRelease.getOSVersionSync(config.hostOSVersionPath)
|
|
osVariant = osRelease.getOSVariantSync(config.hostOSVersionPath)
|
|
supervisorVersion = require('./lib/supervisor-version')
|
|
|
|
userAgent = "Supervisor/#{supervisorVersion}"
|
|
if osVersion?
|
|
if osVariant?
|
|
userAgent += " (Linux; #{osVersion}; #{osVariant})"
|
|
else
|
|
userAgent += " (Linux; #{osVersion})"
|
|
|
|
# With these settings, the device must be unable to receive a single byte
|
|
# from the network for a continuous period of 20 minutes before we give up.
|
|
# (reqTimeout + retryInterval) * retryCount / 1000ms / 60sec ~> minutes
|
|
DEFAULT_REQUEST_TIMEOUT = 30000 # ms
|
|
DEFAULT_REQUEST_RETRY_INTERVAL = 10000 # ms
|
|
DEFAULT_REQUEST_RETRY_COUNT = 30
|
|
|
|
requestOpts =
|
|
gzip: true
|
|
timeout: DEFAULT_REQUEST_TIMEOUT
|
|
headers:
|
|
'User-Agent': userAgent
|
|
|
|
resumableOpts =
|
|
maxRetries: DEFAULT_REQUEST_RETRY_COUNT
|
|
retryInterval: DEFAULT_REQUEST_RETRY_INTERVAL
|
|
|
|
try
|
|
PLATFORM_ENDPOINT = url.resolve(config.apiEndpoint, '/v2/')
|
|
exports.resinApi = resinApi = new PlatformAPI
|
|
apiPrefix: PLATFORM_ENDPOINT
|
|
passthrough: requestOpts
|
|
exports.cachedResinApi = resinApi.clone({}, cache: {})
|
|
catch
|
|
exports.resinApi = {}
|
|
exports.cachedResinApi = {}
|
|
|
|
request = request.defaults(requestOpts)
|
|
|
|
exports.request = Promise.promisifyAll(request, multiArgs: true)
|
|
|
|
exports.resumable = resumable.defaults(resumableOpts)
|