mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-06-04 08:40:49 +00:00
Improve the update strategies:
* On handover, fetch old app from DB before starting the new app (and overwriting the DB record) * Tidy up the logging * Fix waitToKill so that it actually works * Several other fixups
This commit is contained in:
parent
1d6811a423
commit
cbb3e2f461
@ -13,6 +13,10 @@ logger = require './lib/logger'
|
|||||||
device = require './device'
|
device = require './device'
|
||||||
lockFile = Promise.promisifyAll(require('lockfile'))
|
lockFile = Promise.promisifyAll(require('lockfile'))
|
||||||
bootstrap = require './bootstrap'
|
bootstrap = require './bootstrap'
|
||||||
|
TypedError = require 'typed-error'
|
||||||
|
fs = Promise.promisifyAll(require('fs'))
|
||||||
|
|
||||||
|
class UpdatesLockedError extends TypedError
|
||||||
|
|
||||||
{ docker } = dockerUtils
|
{ docker } = dockerUtils
|
||||||
|
|
||||||
@ -82,7 +86,7 @@ logSystemEvent = (logType, app, error) ->
|
|||||||
|
|
||||||
application = {}
|
application = {}
|
||||||
|
|
||||||
application.kill = kill = (app) ->
|
application.kill = kill = (app, updateDB = true) ->
|
||||||
logSystemEvent(logTypes.stopApp, app)
|
logSystemEvent(logTypes.stopApp, app)
|
||||||
device.updateState(status: 'Stopping')
|
device.updateState(status: 'Stopping')
|
||||||
container = docker.getContainer(app.containerId)
|
container = docker.getContainer(app.containerId)
|
||||||
@ -110,8 +114,9 @@ application.kill = kill = (app) ->
|
|||||||
lockFile.unlockAsync(lockPath(app))
|
lockFile.unlockAsync(lockPath(app))
|
||||||
.tap ->
|
.tap ->
|
||||||
logSystemEvent(logTypes.stopAppSuccess, app)
|
logSystemEvent(logTypes.stopAppSuccess, app)
|
||||||
app.containerId = null
|
if updateDB == true
|
||||||
knex('app').update(app).where(appId: app.appId)
|
app.containerId = null
|
||||||
|
knex('app').update(app).where(appId: app.appId)
|
||||||
.catch (err) ->
|
.catch (err) ->
|
||||||
logSystemEvent(logTypes.stopAppError, app, err)
|
logSystemEvent(logTypes.stopAppError, app, err)
|
||||||
throw err
|
throw err
|
||||||
@ -274,9 +279,7 @@ application.lockUpdates = lockUpdates = do ->
|
|||||||
.catch ENOENT, _.noop
|
.catch ENOENT, _.noop
|
||||||
.catch (err) ->
|
.catch (err) ->
|
||||||
release()
|
release()
|
||||||
err = new Error("Updates are locked: #{err.message}")
|
throw new UpdatesLockedError("Updates are locked: #{err.message}")
|
||||||
err.isLocked = true
|
|
||||||
throw err
|
|
||||||
.disposer (release) ->
|
.disposer (release) ->
|
||||||
Promise.try ->
|
Promise.try ->
|
||||||
lockFile.unlockAsync(lockName) if force != true
|
lockFile.unlockAsync(lockName) if force != true
|
||||||
@ -331,27 +334,31 @@ wrapAsError = (err) ->
|
|||||||
return err if _.isError(err)
|
return err if _.isError(err)
|
||||||
return new Error(err.message ? err)
|
return new Error(err.message ? err)
|
||||||
|
|
||||||
selectAndKill = (appId) ->
|
select = (appId) ->
|
||||||
knex('app').select().where({ appId })
|
knex('app').select().where({ appId })
|
||||||
.then ([ app ]) ->
|
.then ([ app ]) ->
|
||||||
if !app?
|
if !app?
|
||||||
throw new Error('App not found')
|
throw new Error('App not found')
|
||||||
kill(app)
|
return app
|
||||||
|
|
||||||
# Wait for app to signal it's ready to die, or timeout to complete (if it is defined and not-empty)
|
# Wait for app to signal it's ready to die, or timeout to complete.
|
||||||
|
# timeout defaults to 1 minute.
|
||||||
waitToKill = (app, timeout) ->
|
waitToKill = (app, timeout) ->
|
||||||
startTime = Date.now()
|
startTime = Date.now()
|
||||||
pollInterval = 100
|
pollInterval = 100
|
||||||
timeout = parseInt(timeout)
|
timeout = parseInt(timeout)
|
||||||
|
timeout = 60000 if isNaN(timeout)
|
||||||
checkFileOrTimeout = ->
|
checkFileOrTimeout = ->
|
||||||
fs.statAsync(killmePath(app))
|
fs.statAsync(killmePath(app))
|
||||||
.catch (err) ->
|
.catch (err) ->
|
||||||
throw err if isNaN(timeout) or (Date.now() - startTime) < timeout
|
throw err unless (Date.now() - startTime) > timeout
|
||||||
.then ->
|
.then ->
|
||||||
fs.unlinkAsync(killmePath(app)).catch(_.noop)
|
fs.unlinkAsync(killmePath(app)).catch(_.noop)
|
||||||
checkFileOrTimeout()
|
retryCheck = ->
|
||||||
.catch ->
|
checkFileOrTimeout()
|
||||||
Promise.delay(pollInterval).then(checkFileOrTimeout)
|
.catch ->
|
||||||
|
Promise.delay(pollInterval).then(retryCheck)
|
||||||
|
retryCheck()
|
||||||
|
|
||||||
UPDATE_IDLE = 0
|
UPDATE_IDLE = 0
|
||||||
UPDATE_UPDATING = 1
|
UPDATE_UPDATING = 1
|
||||||
@ -364,49 +371,53 @@ updateStatus =
|
|||||||
intervalHandle: null
|
intervalHandle: null
|
||||||
|
|
||||||
updateStrategies =
|
updateStrategies =
|
||||||
'normal-update': (localApp, app, needsDownload, force, timeout) ->
|
'download-then-kill': ({ localApp, app, needsDownload, force }) ->
|
||||||
Promise.try ->
|
Promise.try ->
|
||||||
fetch(app) if needsDownload
|
fetch(app) if needsDownload
|
||||||
.then ->
|
.then ->
|
||||||
Promise.using lockUpdates(localApp, force), ->
|
Promise.using lockUpdates(localApp, force), ->
|
||||||
logSystemEvent(logTypes.updateApp, app) if localApp.imageId == app.imageId
|
logSystemEvent(logTypes.updateApp, app) if localApp.imageId == app.imageId
|
||||||
selectAndKill(localApp.appId)
|
select(localApp.appId)
|
||||||
|
.then(kill)
|
||||||
.then ->
|
.then ->
|
||||||
start(app)
|
start(app)
|
||||||
.catch (err) ->
|
.catch (err) ->
|
||||||
logSystemEvent(logTypes.updateAppError, app, err) unless err.isLocked?
|
logSystemEvent(logTypes.updateAppError, app, err) unless err instanceof UpdatesLockedError
|
||||||
throw err
|
throw err
|
||||||
'kill-before-download': (localApp, app, needsDownload, force, timeout) ->
|
'kill-then-download': ({ localApp, app, needsDownload, force }) ->
|
||||||
logSystemEvent(logTypes.updateApp, app) if localApp.imageId == app.imageId
|
|
||||||
Promise.using lockUpdates(localApp, force), ->
|
Promise.using lockUpdates(localApp, force), ->
|
||||||
selectAndKill(localApp.appId)
|
logSystemEvent(logTypes.updateApp, app) if localApp.imageId == app.imageId
|
||||||
|
select(localApp.appId)
|
||||||
|
.then(kill)
|
||||||
.then ->
|
.then ->
|
||||||
fetch(app) if needsDownload
|
fetch(app) if needsDownload
|
||||||
.then ->
|
.then ->
|
||||||
start(app)
|
start(app)
|
||||||
.catch (err) ->
|
.catch (err) ->
|
||||||
logSystemEvent(logTypes.updateAppError, app, err) unless err.isLocked?
|
logSystemEvent(logTypes.updateAppError, app, err) unless err instanceof UpdatesLockedError
|
||||||
throw err
|
throw err
|
||||||
'hand-over': (localApp, app, needsDownload, force, timeout) ->
|
'hand-over': ({ localApp, app, needsDownload, force, timeout }) ->
|
||||||
Promise.using lockUpdates(localApp, force), ->
|
Promise.using lockUpdates(localApp, force), ->
|
||||||
Promise.try ->
|
select(localApp.appId)
|
||||||
fetch(app) if needsDownload
|
.then (localApp) ->
|
||||||
.then ->
|
Promise.try ->
|
||||||
logSystemEvent(logTypes.updateApp, app) if localApp.imageId == app.imageId
|
fetch(app) if needsDownload
|
||||||
start(app)
|
.then ->
|
||||||
.then ->
|
logSystemEvent(logTypes.updateApp, app) if localApp.imageId == app.imageId
|
||||||
waitToKill(localApp, timeout)
|
start(app)
|
||||||
.then ->
|
.then ->
|
||||||
selectAndKill(localApp.appId)
|
waitToKill(localApp, timeout)
|
||||||
|
.then ->
|
||||||
|
kill(localApp, false)
|
||||||
.catch (err) ->
|
.catch (err) ->
|
||||||
logSystemEvent(logTypes.updateAppError, app, err) unless err.isLocked?
|
logSystemEvent(logTypes.updateAppError, app, err) unless err instanceof UpdatesLockedError
|
||||||
throw err
|
throw err
|
||||||
|
|
||||||
|
|
||||||
updateUsingStrategy = (strategy, localApp, app, needsDownload, force, timeout) ->
|
updateUsingStrategy = (strategy, options) ->
|
||||||
if strategy not in _.keys(updateStrategies)
|
if not _.has(updateStrategies, strategy)
|
||||||
strategy = 'normal-update'
|
strategy = 'download-then-kill'
|
||||||
updateStrategies[strategy](localApp, app, needsDownload, force, timeout)
|
updateStrategies[strategy](options)
|
||||||
|
|
||||||
getRemoteApps = (uuid, apiKey) ->
|
getRemoteApps = (uuid, apiKey) ->
|
||||||
cachedResinApi.get
|
cachedResinApi.get
|
||||||
@ -430,12 +441,11 @@ getEnvAndFormatRemoteApps = (deviceId, remoteApps, uuid, apiKey) ->
|
|||||||
.then (environment) ->
|
.then (environment) ->
|
||||||
app.environment_variable = environment
|
app.environment_variable = environment
|
||||||
utils.extendEnvVars(app.environment_variable, uuid)
|
utils.extendEnvVars(app.environment_variable, uuid)
|
||||||
.then (env) ->
|
.then (fullEnv) ->
|
||||||
fullEnv = env
|
env = _.omit(fullEnv, _.keys(specialActionEnvVars))
|
||||||
env = _.omit(env, _.keys(specialActionEnvVars))
|
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
appId: app.id
|
appId: '' + app.id
|
||||||
env: fullEnv
|
env: fullEnv
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -445,6 +455,7 @@ getEnvAndFormatRemoteApps = (deviceId, remoteApps, uuid, apiKey) ->
|
|||||||
env: JSON.stringify(env) # The env has to be stored as a JSON string for knex
|
env: JSON.stringify(env) # The env has to be stored as a JSON string for knex
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
.then(_.flatten)
|
||||||
.then(_.zip)
|
.then(_.zip)
|
||||||
.then ([ remoteAppEnvs, remoteApps ]) ->
|
.then ([ remoteAppEnvs, remoteApps ]) ->
|
||||||
return [_.mapValues(_.indexBy(remoteAppEnvs, 'appId'), 'env'), _.indexBy(remoteApps, 'appId')]
|
return [_.mapValues(_.indexBy(remoteAppEnvs, 'appId'), 'env'), _.indexBy(remoteApps, 'appId')]
|
||||||
@ -476,6 +487,9 @@ compareForUpdate = (localApps, remoteApps, localAppEnvs, remoteAppEnvs) ->
|
|||||||
allAppIds = _.union(localAppIds, remoteAppIds)
|
allAppIds = _.union(localAppIds, remoteAppIds)
|
||||||
return { toBeRemoved, toBeDownloaded, toBeInstalled, toBeUpdated, appsWithChangedEnvs, allAppIds }
|
return { toBeRemoved, toBeDownloaded, toBeInstalled, toBeUpdated, appsWithChangedEnvs, allAppIds }
|
||||||
|
|
||||||
|
getConfig = (key) ->
|
||||||
|
knex('config').select('value').where({ key }).get(0).get('value')
|
||||||
|
|
||||||
application.update = update = (force) ->
|
application.update = update = (force) ->
|
||||||
if updateStatus.state isnt UPDATE_IDLE
|
if updateStatus.state isnt UPDATE_IDLE
|
||||||
# Mark an update required after the current.
|
# Mark an update required after the current.
|
||||||
@ -484,15 +498,7 @@ application.update = update = (force) ->
|
|||||||
return
|
return
|
||||||
updateStatus.state = UPDATE_UPDATING
|
updateStatus.state = UPDATE_UPDATING
|
||||||
bootstrap.done.then ->
|
bootstrap.done.then ->
|
||||||
Promise.all([
|
Promise.join getConfig('apiKey'), getConfig('uuid'), knex('app').select(), (apiKey, uuid, apps) ->
|
||||||
knex('config').select('value').where(key: 'apiKey')
|
|
||||||
knex('config').select('value').where(key: 'uuid')
|
|
||||||
knex('app').select()
|
|
||||||
])
|
|
||||||
.then ([ [ apiKey ], [ uuid ], apps ]) ->
|
|
||||||
apiKey = apiKey.value
|
|
||||||
uuid = uuid.value
|
|
||||||
|
|
||||||
deviceId = device.getID()
|
deviceId = device.getID()
|
||||||
remoteApps = getRemoteApps(uuid, apiKey)
|
remoteApps = getRemoteApps(uuid, apiKey)
|
||||||
|
|
||||||
@ -523,10 +529,11 @@ application.update = update = (force) ->
|
|||||||
Promise.try ->
|
Promise.try ->
|
||||||
needsDownload = _.includes(toBeDownloaded, appId)
|
needsDownload = _.includes(toBeDownloaded, appId)
|
||||||
if _.includes(toBeRemoved, appId)
|
if _.includes(toBeRemoved, appId)
|
||||||
Promise.using lockUpdates(apps[appId], force), ->
|
Promise.using lockUpdates(localApps[appId], force), ->
|
||||||
# We get the app from the DB again in case someone restarted it
|
# We get the app from the DB again in case someone restarted it
|
||||||
# (which would have changed its containerId)
|
# (which would have changed its containerId)
|
||||||
selectAndKill(appId)
|
select(appId)
|
||||||
|
.then(kill)
|
||||||
.then ->
|
.then ->
|
||||||
knex('app').where('appId', appId).delete()
|
knex('app').where('appId', appId).delete()
|
||||||
.catch (err) ->
|
.catch (err) ->
|
||||||
@ -544,13 +551,22 @@ application.update = update = (force) ->
|
|||||||
app = remoteApps[appId]
|
app = remoteApps[appId]
|
||||||
# Restore the complete environment so that it's persisted in the DB
|
# Restore the complete environment so that it's persisted in the DB
|
||||||
app.env = JSON.stringify(remoteAppEnvs[appId])
|
app.env = JSON.stringify(remoteAppEnvs[appId])
|
||||||
forceThisApp = remoteAppEnvs[appId]['RESIN_SUPERVISOR_OVERRIDE_LOCK'] == '1' || remoteAppEnvs[appId]['RESIN_OVERRIDE_LOCK'] == '1'
|
forceThisApp =
|
||||||
|
remoteAppEnvs[appId]['RESIN_SUPERVISOR_OVERRIDE_LOCK'] == '1' ||
|
||||||
|
remoteAppEnvs[appId]['RESIN_OVERRIDE_LOCK'] == '1'
|
||||||
strategy = remoteAppEnvs[appId]['RESIN_SUPERVISOR_UPDATE_STRATEGY']
|
strategy = remoteAppEnvs[appId]['RESIN_SUPERVISOR_UPDATE_STRATEGY']
|
||||||
timeout = remoteAppEnvs[appId]['RESIN_SUPERVISOR_HANDOVER_TIMEOUT']
|
timeout = remoteAppEnvs[appId]['RESIN_SUPERVISOR_HANDOVER_TIMEOUT']
|
||||||
updateUsingStrategy(strategy, apps[appId], app, needsDownload, force || forceThisApp, timeout)
|
updateUsingStrategy strategy, {
|
||||||
|
localApp: localApps[appId]
|
||||||
|
app
|
||||||
|
needsDownload
|
||||||
|
force: force || forceThisApp
|
||||||
|
timeout
|
||||||
|
}
|
||||||
.catch(wrapAsError)
|
.catch(wrapAsError)
|
||||||
.filter(_.isError)
|
.filter(_.isError)
|
||||||
.then (failures) ->
|
.then (failures) ->
|
||||||
|
_.each(failures, (err) -> console.error('Error:', err, err.stack))
|
||||||
throw new Error(joinErrorMessages(failures)) if failures.length > 0
|
throw new Error(joinErrorMessages(failures)) if failures.length > 0
|
||||||
.then ->
|
.then ->
|
||||||
updateStatus.failed = 0
|
updateStatus.failed = 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user