Auto-merge for PR #507 via VersionBot

Better container management to avoid duplicated/spurious containers
This commit is contained in:
resin-io-versionbot[bot] 2017-10-31 00:26:07 +00:00 committed by GitHub
commit 0b86a60618
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 166 additions and 112 deletions

View File

@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file
automatically by Versionist. DO NOT EDIT THIS FILE MANUALLY!
This project adheres to [Semantic Versioning](http://semver.org/).
## v6.3.9 - 2017-10-30
* Avoid fetching an image when it might be available or when starting an app because it might not be necessary #507 [Pablo Carranza Velez]
* Refactor container cleanup to remove all spurious containers #507 [Pablo Carranza Velez]
* Use container name instead of id to identify apps, and avoid duplicated containers #507 [Pablo Carranza Velez]
## v6.3.8 - 2017-10-30
* If a device is already provisioned but the key exchange fails, retry it until it succeeds #513 [Pablo Carranza Velez]

View File

@ -1,7 +1,7 @@
{
"name": "resin-supervisor",
"description": "This is resin.io's Supervisor, a program that runs on IoT devices and has the task of running user Apps (which are Docker containers), and updating them as Resin's API informs it to.",
"version": "6.3.8",
"version": "6.3.9",
"license": "Apache-2.0",
"repository": {
"type": "git",

View File

@ -154,10 +154,12 @@ module.exports = (application) ->
return res.status(400).send('Missing app id')
Promise.using application.lockUpdates(appId, force), ->
utils.getKnexApp(appId)
.tap (app) ->
application.kill(app, removeContainer: false)
.then (app) ->
res.json(_.pick(app, 'containerId'))
application.getContainerId(app)
.then (containerId) ->
application.kill(app, removeContainer: false)
.then (app) ->
res.json({ containerId })
.catch utils.AppNotFoundError, (e) ->
return res.status(400).send(e.message)
.catch (err) ->
@ -172,8 +174,10 @@ module.exports = (application) ->
utils.getKnexApp(appId)
.tap (app) ->
application.start(app)
.then (app) ->
res.json(_.pick(app, 'containerId'))
.then ->
application.getContainerId(app)
.then (containerId) ->
res.json({ containerId })
.catch utils.AppNotFoundError, (e) ->
return res.status(400).send(e.message)
.catch (err) ->
@ -185,13 +189,16 @@ module.exports = (application) ->
if !appId?
return res.status(400).send('Missing app id')
Promise.using application.lockUpdates(appId, true), ->
columns = [ 'appId', 'containerId', 'commit', 'imageId', 'env' ]
columns = [ 'appId', 'containerName', 'commit', 'imageId', 'env' ]
utils.getKnexApp(appId, columns)
.then (app) ->
# Don't return keys on the endpoint
app.env = _.omit(JSON.parse(app.env), config.privateAppEnvVars)
# Don't return data that will be of no use to the user
res.json(app)
application.getContainerId(app)
.then (containerId) ->
# Don't return keys on the endpoint
app.env = _.omit(JSON.parse(app.env), config.privateAppEnvVars)
app.containerId = containerId
# Don't return data that will be of no use to the user
res.json(_.omit(app, [ 'containerName' ]))
.catch utils.AppNotFoundError, (e) ->
return res.status(400).send(e.message)
.catch (err) ->

View File

@ -18,6 +18,7 @@ proxyvisor = require './proxyvisor'
{ checkInt, checkTruthy } = require './lib/validation'
osRelease = require './lib/os-release'
deviceConfig = require './device-config'
randomHexString = require './lib/random-hex-string'
class UpdatesLockedError extends TypedError
ImageNotFoundError = (err) ->
@ -155,7 +156,7 @@ logSpecialAction = (action, value, success) ->
application.kill = kill = (app, { updateDB = true, removeContainer = true } = {}) ->
logSystemEvent(logTypes.stopApp, app)
device.updateState(status: 'Stopping')
container = docker.getContainer(app.containerId)
container = docker.getContainer(app.containerName)
container.stop(t: 10)
.then ->
container.remove(v: true) if removeContainer
@ -183,7 +184,7 @@ application.kill = kill = (app, { updateDB = true, removeContainer = true } = {}
.tap ->
logSystemEvent(logTypes.stopAppSuccess, app)
if removeContainer && updateDB
app.containerId = null
app.containerName = null
knex('app').update(app).where(appId: app.appId)
.catch (err) ->
logSystemEvent(logTypes.stopAppError, app, err)
@ -209,7 +210,7 @@ fetch = (app, { deltaSource, setDeviceUpdateState = true } = {}) ->
device.updateState(download_progress: progress.percentage)
docker.getImage(app.imageId).inspect()
.catch (error) ->
.catch ImageNotFoundError, ->
device.updateState(status: 'Downloading', download_progress: 0)
Promise.try ->
@ -255,6 +256,12 @@ isExecFormatError = (err) ->
message = err.json.trim()
/exec format error$/.test(message)
generateContainerName = (app) ->
randomHexString.generate()
.then (randomString) ->
sanitisedAppName = app.name.replace(/[^a-zA-Z0-9]+/g, '_')
return "#{sanitisedAppName}_#{randomString}"
application.start = start = (app) ->
device.isResinOSv1().then (isV1) ->
volumes = utils.defaultVolumes(isV1)
@ -270,50 +277,58 @@ application.start = start = (app) ->
.map((port) -> port.trim())
.filter(isValidPort)
if app.containerId?
if app.containerName?
# If we have a container id then check it exists and if so use it.
container = docker.getContainer(app.containerId)
container = docker.getContainer(app.containerName)
containerPromise = container.inspect().return(container)
else
containerPromise = Promise.rejected()
# If there is no existing container then create one instead.
containerPromise.catch ->
fetch(app)
# If the image is not available, we'll let the update cycle fix it
docker.getImage(app.imageId).inspect()
.then (imageInfo) ->
logSystemEvent(logTypes.installApp, app)
device.updateState(status: 'Installing')
generateContainerName(app)
.tap (name) ->
app.containerName = name
knex('app').update(app).where(appId: app.appId)
.then (affectedRows) ->
knex('app').insert(app) if affectedRows == 0
.then (name) ->
ports = {}
portBindings = {}
if portList?
portList.forEach (port) ->
ports[port + '/tcp'] = {}
portBindings[port + '/tcp'] = [ HostPort: port ]
ports = {}
portBindings = {}
if portList?
portList.forEach (port) ->
ports[port + '/tcp'] = {}
portBindings[port + '/tcp'] = [ HostPort: port ]
if imageInfo?.Config?.Cmd
cmd = imageInfo.Config.Cmd
else
cmd = [ '/bin/bash', '-c', '/start' ]
if imageInfo?.Config?.Cmd
cmd = imageInfo.Config.Cmd
else
cmd = [ '/bin/bash', '-c', '/start' ]
restartPolicy = createRestartPolicy({ name: conf['RESIN_APP_RESTART_POLICY'], maximumRetryCount: conf['RESIN_APP_RESTART_RETRIES'] })
shouldMountKmod(app.imageId)
.then (shouldMount) ->
binds.push('/bin/kmod:/bin/kmod:ro') if shouldMount
docker.createContainer(
Image: app.imageId
Cmd: cmd
Tty: true
Volumes: volumes
Env: _.map env, (v, k) -> k + '=' + v
ExposedPorts: ports
HostConfig:
Privileged: true
NetworkMode: 'host'
PortBindings: portBindings
Binds: binds
RestartPolicy: restartPolicy
)
restartPolicy = createRestartPolicy({ name: conf['RESIN_APP_RESTART_POLICY'], maximumRetryCount: conf['RESIN_APP_RESTART_RETRIES'] })
shouldMountKmod(app.imageId)
.then (shouldMount) ->
binds.push('/bin/kmod:/bin/kmod:ro') if shouldMount
docker.createContainer(
name: name
Image: app.imageId
Cmd: cmd
Tty: true
Volumes: volumes
Env: _.map env, (v, k) -> k + '=' + v
ExposedPorts: ports
HostConfig:
Privileged: true
NetworkMode: 'host'
PortBindings: portBindings
Binds: binds
RestartPolicy: restartPolicy
)
.tap ->
logSystemEvent(logTypes.installAppSuccess, app)
.catch (err) ->
@ -341,21 +356,20 @@ application.start = start = (app) ->
.catch (err) ->
# If starting the container failed, we remove it so that it doesn't litter
container.remove(v: true)
.then ->
app.containerId = null
knex('app').update(app).where(appId: app.appId)
.finally ->
logSystemEvent(logTypes.startAppError, app, err)
throw err
.then ->
app.containerId = container.id
device.updateState(commit: app.commit)
logger.attach(app)
.tap (container) ->
# Update the app info, only if starting the container worked.
knex('app').update(app).where(appId: app.appId)
.then (affectedRows) ->
knex('app').insert(app) if affectedRows == 0
.catch (err) ->
# If creating or starting the app failed, we soft-delete it so that
# the next update cycle doesn't think the app is up to date
app.containerName = null
app.markedForDeletion = true
knex('app').update(app).where(appId: app.appId)
.finally ->
logSystemEvent(logTypes.startAppError, app, err)
throw err
.tap ->
if alreadyStarted
logSystemEvent(logTypes.startAppNoop, app)
@ -765,7 +779,7 @@ application.update = update = (force, scheduled = false) ->
app = localApps[appId]
Promise.using lockUpdates(app, force), ->
# We get the app from the DB again in case someone restarted it
# (which would have changed its containerId)
# (which would have changed its containerName)
utils.getKnexApp(appId)
.then(kill)
.then ->
@ -848,6 +862,8 @@ application.update = update = (force, scheduled = false) ->
device.updateState(status: 'Idle')
return
sanitiseContainerName = (name) -> name.replace(/^\//, '')
listenToEvents = do ->
appHasDied = {}
return ->
@ -859,14 +875,14 @@ listenToEvents = do ->
parser.on 'error', (err) ->
console.error('Error on docker events JSON stream:', err, err.stack)
parser.on 'data', (data) ->
if data?.Type? && data.Type == 'container' && data.status in ['die', 'start']
knex('app').select().where({ containerId: data.id })
if data?.Type? && data.Type == 'container' && data.status in ['die', 'start'] && data.Actor?.Attributes?.Name?
knex('app').select().where({ containerName: sanitiseContainerName(data.Actor.Attributes.Name) })
.then ([ app ]) ->
if app?
if data.status == 'die'
logSystemEvent(logTypes.appExit, app)
appHasDied[app.containerId] = true
else if data.status == 'start' and appHasDied[app.containerId]
appHasDied[app.containerName] = true
else if data.status == 'start' and appHasDied[app.containerName]
logSystemEvent(logTypes.appRestart, app)
logger.attach(app)
.catch (err) ->
@ -878,9 +894,35 @@ listenToEvents = do ->
.catch (err) ->
console.error('Error listening to events:', err, err.stack)
migrateContainerIdApps = ->
knex.schema.hasColumn('app', 'containerId')
.then (exists) ->
return if not exists
knex('app').whereNotNull('containerId').select()
.then (apps) ->
return if !apps? or apps.length == 0
Promise.map apps, (app) ->
docker.getContainer(app.containerId).inspect()
.catchReturn({ Name: null })
.then (container) ->
app.containerName = sanitiseContainerName(container.Name)
app.containerId = null
knex('app').update(app).where({ id: app.id })
.then ->
knex.schema.table 'app', (table) ->
table.dropColumn('containerId')
application.getContainerId = (app) ->
docker.getContainer(app.containerName).inspect()
.catchReturn({ Id: null })
.then (container) ->
return container.Id
application.initialize = ->
listenToEvents()
getAndApplyDeviceConfig()
migrateContainerIdApps()
.then ->
getAndApplyDeviceConfig()
.then ->
knex('app').whereNot(markedForDeletion: true).orWhereNull('markedForDeletion').select()
.map (app) ->

View File

@ -40,7 +40,7 @@ knex.init = Promise.all([
knex.schema.createTable 'app', (t) ->
t.increments('id').primary()
t.string('name')
t.string('containerId')
t.string('containerName')
t.string('commit')
t.string('imageId')
t.string('appId')
@ -52,6 +52,7 @@ knex.init = Promise.all([
Promise.all [
addColumn('app', 'commit', 'string')
addColumn('app', 'appId', 'string')
addColumn('app', 'containerName', 'string')
addColumn('app', 'config', 'json')
addColumn('app', 'markedForDeletion', 'boolean')
]

View File

@ -138,62 +138,60 @@ do ->
Promise.using writeLockImages(), ->
Promise.join(
knex('app').select()
.map ({ imageId }) ->
normalizeRepoTag(imageId)
.map (app) ->
app.imageId = normalizeRepoTag(app.imageId)
return Promise.props(app)
knex('dependentApp').select().whereNotNull('imageId')
.map ({ imageId }) ->
normalizeRepoTag(imageId)
return normalizeRepoTag(imageId)
supervisorTagPromise
docker.listImages()
.map (image) ->
image.NormalizedRepoTags = Promise.map(image.RepoTags, normalizeRepoTag)
Promise.props(image)
return Promise.props(image)
Promise.map(extraImagesToIgnore, normalizeRepoTag)
(apps, dependentApps, supervisorTag, images, normalizedExtraImages) ->
appNames = _.map(apps, 'containerName')
appImages = _.map(apps, 'imageId')
imageTags = _.map(images, 'NormalizedRepoTags')
supervisorTags = _.filter imageTags, (tags) ->
_.includes(tags, supervisorTag)
appTags = _.filter imageTags, (tags) ->
_.some tags, (tag) ->
_.includes(apps, tag) or _.includes(dependentApps, tag)
extraTags = _.filter imageTags, (tags) ->
_.some tags, (tag) ->
_.includes(normalizedExtraImages, tag)
supervisorTags = _.flatten(supervisorTags)
_.includes(appImages, tag) or _.includes(dependentApps, tag)
appTags = _.flatten(appTags)
supervisorTags = _.filter imageTags, (tags) ->
_.includes(tags, supervisorTag)
supervisorTags = _.flatten(supervisorTags)
extraTags = _.filter imageTags, (tags) ->
_.some(tags, (tag) -> _.includes(normalizedExtraImages, tag))
extraTags = _.flatten(extraTags)
return { images, supervisorTags, appTags, extraTags }
)
.then ({ images, supervisorTags, appTags, extraTags }) ->
# Cleanup containers first, so that they don't block image removal.
docker.listContainers(all: true)
.filter (containerInfo) ->
# Do not remove user apps.
normalizeRepoTag(containerInfo.Image)
.then (repoTag) ->
if _.includes(appTags, repoTag)
return false
if _.includes(extraTags, repoTag)
return false
if !_.includes(supervisorTags, repoTag)
allProtectedTags = _.union(appTags, supervisorTags, extraTags)
# Cleanup containers first, so that they don't block image removal.
docker.listContainers(all: true)
.filter (containerInfo) ->
# Do not remove user apps.
normalizeRepoTag(containerInfo.Image)
.then (repoTag) ->
if _.includes(appTags, repoTag)
return !_.some containerInfo.Names, (name) ->
_.some appNames, (appContainerName) -> "/#{appContainerName}" == name
if _.includes(supervisorTags, repoTag)
return containerHasExited(containerInfo.Id)
return true
return containerHasExited(containerInfo.Id)
.map (containerInfo) ->
docker.getContainer(containerInfo.Id).remove(v: true, force: true)
.map (containerInfo) ->
docker.getContainer(containerInfo.Id).remove(v: true, force: true)
.then ->
console.log('Deleted container:', containerInfo.Id, containerInfo.Image)
.catch(_.noop)
.then ->
console.log('Deleted container:', containerInfo.Id, containerInfo.Image)
.catch(_.noop)
.then ->
imagesToClean = _.reject images, (image) ->
_.some image.NormalizedRepoTags, (tag) ->
return _.includes(appTags, tag) or _.includes(supervisorTags, tag) or _.includes(extraTags, tag)
Promise.map imagesToClean, (image) ->
Promise.map image.RepoTags.concat(image.Id), (tag) ->
docker.getImage(tag).remove(force: true)
.then ->
console.log('Deleted image:', tag, image.Id, image.RepoTags)
.catch(_.noop)
imagesToClean = _.reject images, (image) ->
_.some(image.NormalizedRepoTags, (tag) -> _.includes(allProtectedTags, tag))
Promise.map imagesToClean, (image) ->
Promise.map image.RepoTags.concat(image.Id), (tag) ->
docker.getImage(tag).remove(force: true)
.then ->
console.log('Deleted image:', tag, image.Id, image.RepoTags)
.catch(_.noop)
)
containerHasExited = (id) ->
docker.getContainer(id).inspect()

View File

@ -80,19 +80,19 @@ exports.log = ->
do ->
_lock = new Lock()
_writeLock = Promise.promisify(_lock.async.writeLock)
loggerLock = (containerId) ->
_writeLock(containerId)
loggerLock = (containerName) ->
_writeLock(containerName)
.disposer (release) ->
release()
attached = {}
exports.attach = (app) ->
Promise.using loggerLock(app.containerId), ->
if !attached[app.containerId]
docker.getContainer(app.containerId)
Promise.using loggerLock(app.containerName), ->
if !attached[app.containerName]
docker.getContainer(app.containerName)
.logs({ follow: true, stdout: true, stderr: true, timestamps: true })
.then (stream) ->
attached[app.containerId] = true
attached[app.containerName] = true
stream.pipe(es.split())
.on 'data', (logLine) ->
space = logLine.indexOf(' ')
@ -101,6 +101,6 @@ do ->
publish(msg)
.on 'error', (err) ->
console.error('Error on container logs', err, err.stack)
attached[app.containerId] = false
attached[app.containerName] = false
.on 'end', ->
attached[app.containerId] = false
attached[app.containerName] = false