From bd34a19a798b06eb2ca133757b05c4f59a756d40 Mon Sep 17 00:00:00 2001 From: Pablo Carranza Velez Date: Tue, 17 Oct 2017 16:56:29 -0700 Subject: [PATCH 1/4] Use container name instead of id to identify apps, and avoid duplicated containers By storing the container name before creating the container, we avoid problems if the supervisor crashes or the device reboots between creating a container and storing its id. Change-Type: patch Signed-off-by: Pablo Carranza Velez --- src/api.coffee | 27 +++++--- src/application.coffee | 139 ++++++++++++++++++++++++++--------------- src/db.coffee | 3 +- src/lib/logger.coffee | 16 ++--- 4 files changed, 117 insertions(+), 68 deletions(-) diff --git a/src/api.coffee b/src/api.coffee index c0d0058e..ccad1e00 100644 --- a/src/api.coffee +++ b/src/api.coffee @@ -154,10 +154,12 @@ module.exports = (application) -> return res.status(400).send('Missing app id') Promise.using application.lockUpdates(appId, force), -> utils.getKnexApp(appId) - .tap (app) -> - application.kill(app, removeContainer: false) .then (app) -> - res.json(_.pick(app, 'containerId')) + application.getContainerId(app) + .then (containerId) -> + application.kill(app, removeContainer: false) + .then (app) -> + res.json({ containerId }) .catch utils.AppNotFoundError, (e) -> return res.status(400).send(e.message) .catch (err) -> @@ -172,8 +174,10 @@ module.exports = (application) -> utils.getKnexApp(appId) .tap (app) -> application.start(app) - .then (app) -> - res.json(_.pick(app, 'containerId')) + .then -> + application.getContainerId(app) + .then (containerId) -> + res.json({ containerId }) .catch utils.AppNotFoundError, (e) -> return res.status(400).send(e.message) .catch (err) -> @@ -185,13 +189,16 @@ module.exports = (application) -> if !appId? return res.status(400).send('Missing app id') Promise.using application.lockUpdates(appId, true), -> - columns = [ 'appId', 'containerId', 'commit', 'imageId', 'env' ] + columns = [ 'appId', 'containerName', 'commit', 'imageId', 'env' ] utils.getKnexApp(appId, columns) .then (app) -> - # Don't return keys on the endpoint - app.env = _.omit(JSON.parse(app.env), config.privateAppEnvVars) - # Don't return data that will be of no use to the user - res.json(app) + application.getContainerId(app) + .then (containerId) -> + # Don't return keys on the endpoint + app.env = _.omit(JSON.parse(app.env), config.privateAppEnvVars) + app.containerId = containerId + # Don't return data that will be of no use to the user + res.json(_.omit(app, [ 'containerName' ])) .catch utils.AppNotFoundError, (e) -> return res.status(400).send(e.message) .catch (err) -> diff --git a/src/application.coffee b/src/application.coffee index 6ac6d64e..83d29c2b 100644 --- a/src/application.coffee +++ b/src/application.coffee @@ -18,6 +18,7 @@ proxyvisor = require './proxyvisor' { checkInt, checkTruthy } = require './lib/validation' osRelease = require './lib/os-release' deviceConfig = require './device-config' +randomHexString = require './lib/random-hex-string' class UpdatesLockedError extends TypedError ImageNotFoundError = (err) -> @@ -155,7 +156,7 @@ logSpecialAction = (action, value, success) -> application.kill = kill = (app, { updateDB = true, removeContainer = true } = {}) -> logSystemEvent(logTypes.stopApp, app) device.updateState(status: 'Stopping') - container = docker.getContainer(app.containerId) + container = docker.getContainer(app.containerName) container.stop(t: 10) .then -> container.remove(v: true) if removeContainer @@ -183,7 +184,7 @@ application.kill = kill = (app, { updateDB = true, removeContainer = true } = {} .tap -> logSystemEvent(logTypes.stopAppSuccess, app) if removeContainer && updateDB - app.containerId = null + app.containerName = null knex('app').update(app).where(appId: app.appId) .catch (err) -> logSystemEvent(logTypes.stopAppError, app, err) @@ -255,6 +256,12 @@ isExecFormatError = (err) -> message = err.json.trim() /exec format error$/.test(message) +generateContainerName = (app) -> + randomHexString.generate() + .then (randomString) -> + sanitisedAppName = app.name.replace(/[^a-zA-Z0-9]+/g, '_') + return "#{sanitisedAppName}_#{randomString}" + application.start = start = (app) -> device.isResinOSv1().then (isV1) -> volumes = utils.defaultVolumes(isV1) @@ -270,9 +277,9 @@ application.start = start = (app) -> .map((port) -> port.trim()) .filter(isValidPort) - if app.containerId? + if app.containerName? # If we have a container id then check it exists and if so use it. - container = docker.getContainer(app.containerId) + container = docker.getContainer(app.containerName) containerPromise = container.inspect().return(container) else containerPromise = Promise.rejected() @@ -283,37 +290,44 @@ application.start = start = (app) -> .then (imageInfo) -> logSystemEvent(logTypes.installApp, app) device.updateState(status: 'Installing') + generateContainerName(app) + .tap (name) -> + app.containerName = name + knex('app').update(app).where(appId: app.appId) + .then (affectedRows) -> + knex('app').insert(app) if affectedRows == 0 + .then (name) -> + ports = {} + portBindings = {} + if portList? + portList.forEach (port) -> + ports[port + '/tcp'] = {} + portBindings[port + '/tcp'] = [ HostPort: port ] - ports = {} - portBindings = {} - if portList? - portList.forEach (port) -> - ports[port + '/tcp'] = {} - portBindings[port + '/tcp'] = [ HostPort: port ] + if imageInfo?.Config?.Cmd + cmd = imageInfo.Config.Cmd + else + cmd = [ '/bin/bash', '-c', '/start' ] - if imageInfo?.Config?.Cmd - cmd = imageInfo.Config.Cmd - else - cmd = [ '/bin/bash', '-c', '/start' ] - - restartPolicy = createRestartPolicy({ name: conf['RESIN_APP_RESTART_POLICY'], maximumRetryCount: conf['RESIN_APP_RESTART_RETRIES'] }) - shouldMountKmod(app.imageId) - .then (shouldMount) -> - binds.push('/bin/kmod:/bin/kmod:ro') if shouldMount - docker.createContainer( - Image: app.imageId - Cmd: cmd - Tty: true - Volumes: volumes - Env: _.map env, (v, k) -> k + '=' + v - ExposedPorts: ports - HostConfig: - Privileged: true - NetworkMode: 'host' - PortBindings: portBindings - Binds: binds - RestartPolicy: restartPolicy - ) + restartPolicy = createRestartPolicy({ name: conf['RESIN_APP_RESTART_POLICY'], maximumRetryCount: conf['RESIN_APP_RESTART_RETRIES'] }) + shouldMountKmod(app.imageId) + .then (shouldMount) -> + binds.push('/bin/kmod:/bin/kmod:ro') if shouldMount + docker.createContainer( + name: name + Image: app.imageId + Cmd: cmd + Tty: true + Volumes: volumes + Env: _.map env, (v, k) -> k + '=' + v + ExposedPorts: ports + HostConfig: + Privileged: true + NetworkMode: 'host' + PortBindings: portBindings + Binds: binds + RestartPolicy: restartPolicy + ) .tap -> logSystemEvent(logTypes.installAppSuccess, app) .catch (err) -> @@ -341,21 +355,20 @@ application.start = start = (app) -> .catch (err) -> # If starting the container failed, we remove it so that it doesn't litter container.remove(v: true) - .then -> - app.containerId = null - knex('app').update(app).where(appId: app.appId) .finally -> - logSystemEvent(logTypes.startAppError, app, err) throw err .then -> - app.containerId = container.id device.updateState(commit: app.commit) logger.attach(app) - .tap (container) -> - # Update the app info, only if starting the container worked. - knex('app').update(app).where(appId: app.appId) - .then (affectedRows) -> - knex('app').insert(app) if affectedRows == 0 + .catch (err) -> + # If creating or starting the app failed, we soft-delete it so that + # the next update cycle doesn't think the app is up to date + app.containerName = null + app.markedForDeletion = true + knex('app').update(app).where(appId: app.appId) + .finally -> + logSystemEvent(logTypes.startAppError, app, err) + throw err .tap -> if alreadyStarted logSystemEvent(logTypes.startAppNoop, app) @@ -765,7 +778,7 @@ application.update = update = (force, scheduled = false) -> app = localApps[appId] Promise.using lockUpdates(app, force), -> # We get the app from the DB again in case someone restarted it - # (which would have changed its containerId) + # (which would have changed its containerName) utils.getKnexApp(appId) .then(kill) .then -> @@ -848,6 +861,8 @@ application.update = update = (force, scheduled = false) -> device.updateState(status: 'Idle') return +sanitiseContainerName = (name) -> name.replace(/^\//, '') + listenToEvents = do -> appHasDied = {} return -> @@ -859,14 +874,14 @@ listenToEvents = do -> parser.on 'error', (err) -> console.error('Error on docker events JSON stream:', err, err.stack) parser.on 'data', (data) -> - if data?.Type? && data.Type == 'container' && data.status in ['die', 'start'] - knex('app').select().where({ containerId: data.id }) + if data?.Type? && data.Type == 'container' && data.status in ['die', 'start'] && data.Actor?.Attributes?.Name? + knex('app').select().where({ containerName: sanitiseContainerName(data.Actor.Attributes.Name) }) .then ([ app ]) -> if app? if data.status == 'die' logSystemEvent(logTypes.appExit, app) - appHasDied[app.containerId] = true - else if data.status == 'start' and appHasDied[app.containerId] + appHasDied[app.containerName] = true + else if data.status == 'start' and appHasDied[app.containerName] logSystemEvent(logTypes.appRestart, app) logger.attach(app) .catch (err) -> @@ -878,9 +893,35 @@ listenToEvents = do -> .catch (err) -> console.error('Error listening to events:', err, err.stack) +migrateContainerIdApps = -> + knex.schema.hasColumn('app', 'containerId') + .then (exists) -> + return if not exists + knex('app').whereNotNull('containerId').select() + .then (apps) -> + return if !apps? or apps.length == 0 + Promise.map apps, (app) -> + docker.getContainer(app.containerId).inspect() + .catchReturn({ Name: null }) + .then (container) -> + app.containerName = sanitiseContainerName(container.Name) + app.containerId = null + knex('app').update(app).where({ id: app.id }) + .then -> + knex.schema.table 'app', (table) -> + table.dropColumn('containerId') + +application.getContainerId = (app) -> + docker.getContainer(app.containerName).inspect() + .catchReturn({ Id: null }) + .then (container) -> + return container.Id + application.initialize = -> listenToEvents() - getAndApplyDeviceConfig() + migrateContainerIdApps() + .then -> + getAndApplyDeviceConfig() .then -> knex('app').whereNot(markedForDeletion: true).orWhereNull('markedForDeletion').select() .map (app) -> diff --git a/src/db.coffee b/src/db.coffee index ac611e41..342ac82e 100644 --- a/src/db.coffee +++ b/src/db.coffee @@ -40,7 +40,7 @@ knex.init = Promise.all([ knex.schema.createTable 'app', (t) -> t.increments('id').primary() t.string('name') - t.string('containerId') + t.string('containerName') t.string('commit') t.string('imageId') t.string('appId') @@ -52,6 +52,7 @@ knex.init = Promise.all([ Promise.all [ addColumn('app', 'commit', 'string') addColumn('app', 'appId', 'string') + addColumn('app', 'containerName', 'string') addColumn('app', 'config', 'json') addColumn('app', 'markedForDeletion', 'boolean') ] diff --git a/src/lib/logger.coffee b/src/lib/logger.coffee index e7e4b7cb..873b6141 100644 --- a/src/lib/logger.coffee +++ b/src/lib/logger.coffee @@ -80,19 +80,19 @@ exports.log = -> do -> _lock = new Lock() _writeLock = Promise.promisify(_lock.async.writeLock) - loggerLock = (containerId) -> - _writeLock(containerId) + loggerLock = (containerName) -> + _writeLock(containerName) .disposer (release) -> release() attached = {} exports.attach = (app) -> - Promise.using loggerLock(app.containerId), -> - if !attached[app.containerId] - docker.getContainer(app.containerId) + Promise.using loggerLock(app.containerName), -> + if !attached[app.containerName] + docker.getContainer(app.containerName) .logs({ follow: true, stdout: true, stderr: true, timestamps: true }) .then (stream) -> - attached[app.containerId] = true + attached[app.containerName] = true stream.pipe(es.split()) .on 'data', (logLine) -> space = logLine.indexOf(' ') @@ -101,6 +101,6 @@ do -> publish(msg) .on 'error', (err) -> console.error('Error on container logs', err, err.stack) - attached[app.containerId] = false + attached[app.containerName] = false .on 'end', -> - attached[app.containerId] = false + attached[app.containerName] = false From 0bc23df8c9d8238e2df183ad94294b3fa5a8dac6 Mon Sep 17 00:00:00 2001 From: Pablo Carranza Velez Date: Tue, 17 Oct 2017 17:53:56 -0700 Subject: [PATCH 2/4] Refactor container cleanup to remove all spurious containers We change the way container cleanup works so that it compares running app containers with the container names for the known apps. This allows the cleanup to effectively delete any spurious/duplicated app containers. Change-Type: patch Signed-off-by: Pablo Carranza Velez --- src/docker-utils.coffee | 80 ++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/src/docker-utils.coffee b/src/docker-utils.coffee index 3b001d9e..17e7bcdc 100644 --- a/src/docker-utils.coffee +++ b/src/docker-utils.coffee @@ -138,62 +138,60 @@ do -> Promise.using writeLockImages(), -> Promise.join( knex('app').select() - .map ({ imageId }) -> - normalizeRepoTag(imageId) + .map (app) -> + app.imageId = normalizeRepoTag(app.imageId) + return Promise.props(app) knex('dependentApp').select().whereNotNull('imageId') .map ({ imageId }) -> - normalizeRepoTag(imageId) + return normalizeRepoTag(imageId) supervisorTagPromise docker.listImages() .map (image) -> image.NormalizedRepoTags = Promise.map(image.RepoTags, normalizeRepoTag) - Promise.props(image) + return Promise.props(image) Promise.map(extraImagesToIgnore, normalizeRepoTag) (apps, dependentApps, supervisorTag, images, normalizedExtraImages) -> + appNames = _.map(apps, 'containerName') + appImages = _.map(apps, 'imageId') imageTags = _.map(images, 'NormalizedRepoTags') - supervisorTags = _.filter imageTags, (tags) -> - _.includes(tags, supervisorTag) appTags = _.filter imageTags, (tags) -> _.some tags, (tag) -> - _.includes(apps, tag) or _.includes(dependentApps, tag) - extraTags = _.filter imageTags, (tags) -> - _.some tags, (tag) -> - _.includes(normalizedExtraImages, tag) - supervisorTags = _.flatten(supervisorTags) + _.includes(appImages, tag) or _.includes(dependentApps, tag) appTags = _.flatten(appTags) + supervisorTags = _.filter imageTags, (tags) -> + _.includes(tags, supervisorTag) + supervisorTags = _.flatten(supervisorTags) + extraTags = _.filter imageTags, (tags) -> + _.some(tags, (tag) -> _.includes(normalizedExtraImages, tag)) extraTags = _.flatten(extraTags) - - return { images, supervisorTags, appTags, extraTags } - ) - .then ({ images, supervisorTags, appTags, extraTags }) -> - # Cleanup containers first, so that they don't block image removal. - docker.listContainers(all: true) - .filter (containerInfo) -> - # Do not remove user apps. - normalizeRepoTag(containerInfo.Image) - .then (repoTag) -> - if _.includes(appTags, repoTag) - return false - if _.includes(extraTags, repoTag) - return false - if !_.includes(supervisorTags, repoTag) + allProtectedTags = _.union(appTags, supervisorTags, extraTags) + # Cleanup containers first, so that they don't block image removal. + docker.listContainers(all: true) + .filter (containerInfo) -> + # Do not remove user apps. + normalizeRepoTag(containerInfo.Image) + .then (repoTag) -> + if _.includes(appTags, repoTag) + return !_.some containerInfo.Names, (name) -> + _.some appNames, (appContainerName) -> "/#{appContainerName}" == name + if _.includes(supervisorTags, repoTag) + return containerHasExited(containerInfo.Id) return true - return containerHasExited(containerInfo.Id) - .map (containerInfo) -> - docker.getContainer(containerInfo.Id).remove(v: true, force: true) + .map (containerInfo) -> + docker.getContainer(containerInfo.Id).remove(v: true, force: true) + .then -> + console.log('Deleted container:', containerInfo.Id, containerInfo.Image) + .catch(_.noop) .then -> - console.log('Deleted container:', containerInfo.Id, containerInfo.Image) - .catch(_.noop) - .then -> - imagesToClean = _.reject images, (image) -> - _.some image.NormalizedRepoTags, (tag) -> - return _.includes(appTags, tag) or _.includes(supervisorTags, tag) or _.includes(extraTags, tag) - Promise.map imagesToClean, (image) -> - Promise.map image.RepoTags.concat(image.Id), (tag) -> - docker.getImage(tag).remove(force: true) - .then -> - console.log('Deleted image:', tag, image.Id, image.RepoTags) - .catch(_.noop) + imagesToClean = _.reject images, (image) -> + _.some(image.NormalizedRepoTags, (tag) -> _.includes(allProtectedTags, tag)) + Promise.map imagesToClean, (image) -> + Promise.map image.RepoTags.concat(image.Id), (tag) -> + docker.getImage(tag).remove(force: true) + .then -> + console.log('Deleted image:', tag, image.Id, image.RepoTags) + .catch(_.noop) + ) containerHasExited = (id) -> docker.getContainer(id).inspect() From ecf7e4206c47627a28896abf083b9362c04fe619 Mon Sep 17 00:00:00 2001 From: Pablo Carranza Velez Date: Fri, 27 Oct 2017 18:25:43 -0700 Subject: [PATCH 3/4] Avoid fetching an image when it might be available or when starting an app because it might not be necessary This change removes the behavior where we would try to fetch an app image when starting the app. This might cause an unintended download of an app that is not really needed anymore because we're starting the app on boot and an update cycle would make this image unnecessary. So now we try to inspect the image, and if this fails we will throw an error, causing the app to be soft-deleted and the next update cycle to properly trigger a download of whatever image we need from the target state. We also improve the error catching when fetching an image, to specifically catch an "image not found" error before trying to download - otherwise, any other random error will cause us to try to download the image again, which will not be a noop if we're using deltas. If there's any other error, the correct behavior is to throw and retry later. Change-Type: patch Signed-off-by: Pablo Carranza Velez --- src/application.coffee | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/application.coffee b/src/application.coffee index 83d29c2b..24f587c0 100644 --- a/src/application.coffee +++ b/src/application.coffee @@ -210,7 +210,7 @@ fetch = (app, { deltaSource, setDeviceUpdateState = true } = {}) -> device.updateState(download_progress: progress.percentage) docker.getImage(app.imageId).inspect() - .catch (error) -> + .catch ImageNotFoundError, -> device.updateState(status: 'Downloading', download_progress: 0) Promise.try -> @@ -286,7 +286,8 @@ application.start = start = (app) -> # If there is no existing container then create one instead. containerPromise.catch -> - fetch(app) + # If the image is not available, we'll let the update cycle fix it + docker.getImage(app.imageId).inspect() .then (imageInfo) -> logSystemEvent(logTypes.installApp, app) device.updateState(status: 'Installing') From eb6c4fb7c49e05c681964b9558c3b27e48aef0ba Mon Sep 17 00:00:00 2001 From: "resin-io-versionbot[bot]" Date: Mon, 30 Oct 2017 23:23:50 +0000 Subject: [PATCH 4/4] v6.3.9 --- CHANGELOG.md | 6 ++++++ package.json | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a236496..6e23d4cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file automatically by Versionist. DO NOT EDIT THIS FILE MANUALLY! This project adheres to [Semantic Versioning](http://semver.org/). +## v6.3.9 - 2017-10-30 + +* Avoid fetching an image when it might be available or when starting an app because it might not be necessary #507 [Pablo Carranza Velez] +* Refactor container cleanup to remove all spurious containers #507 [Pablo Carranza Velez] +* Use container name instead of id to identify apps, and avoid duplicated containers #507 [Pablo Carranza Velez] + ## v6.3.8 - 2017-10-30 * If a device is already provisioned but the key exchange fails, retry it until it succeeds #513 [Pablo Carranza Velez] diff --git a/package.json b/package.json index b3a1c034..c3de2db9 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "resin-supervisor", "description": "This is resin.io's Supervisor, a program that runs on IoT devices and has the task of running user Apps (which are Docker containers), and updating them as Resin's API informs it to.", - "version": "6.3.8", + "version": "6.3.9", "license": "Apache-2.0", "repository": { "type": "git",