diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a236496..6e23d4cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file automatically by Versionist. DO NOT EDIT THIS FILE MANUALLY! This project adheres to [Semantic Versioning](http://semver.org/). +## v6.3.9 - 2017-10-30 + +* Avoid fetching an image when it might be available or when starting an app because it might not be necessary #507 [Pablo Carranza Velez] +* Refactor container cleanup to remove all spurious containers #507 [Pablo Carranza Velez] +* Use container name instead of id to identify apps, and avoid duplicated containers #507 [Pablo Carranza Velez] + ## v6.3.8 - 2017-10-30 * If a device is already provisioned but the key exchange fails, retry it until it succeeds #513 [Pablo Carranza Velez] diff --git a/package.json b/package.json index b3a1c034..c3de2db9 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "resin-supervisor", "description": "This is resin.io's Supervisor, a program that runs on IoT devices and has the task of running user Apps (which are Docker containers), and updating them as Resin's API informs it to.", - "version": "6.3.8", + "version": "6.3.9", "license": "Apache-2.0", "repository": { "type": "git", diff --git a/src/api.coffee b/src/api.coffee index c0d0058e..ccad1e00 100644 --- a/src/api.coffee +++ b/src/api.coffee @@ -154,10 +154,12 @@ module.exports = (application) -> return res.status(400).send('Missing app id') Promise.using application.lockUpdates(appId, force), -> utils.getKnexApp(appId) - .tap (app) -> - application.kill(app, removeContainer: false) .then (app) -> - res.json(_.pick(app, 'containerId')) + application.getContainerId(app) + .then (containerId) -> + application.kill(app, removeContainer: false) + .then (app) -> + res.json({ containerId }) .catch utils.AppNotFoundError, (e) -> return res.status(400).send(e.message) .catch (err) -> @@ -172,8 +174,10 @@ module.exports = (application) -> utils.getKnexApp(appId) .tap (app) -> application.start(app) - .then (app) -> - res.json(_.pick(app, 'containerId')) + .then -> + application.getContainerId(app) + .then (containerId) -> + res.json({ containerId }) .catch utils.AppNotFoundError, (e) -> return res.status(400).send(e.message) .catch (err) -> @@ -185,13 +189,16 @@ module.exports = (application) -> if !appId? return res.status(400).send('Missing app id') Promise.using application.lockUpdates(appId, true), -> - columns = [ 'appId', 'containerId', 'commit', 'imageId', 'env' ] + columns = [ 'appId', 'containerName', 'commit', 'imageId', 'env' ] utils.getKnexApp(appId, columns) .then (app) -> - # Don't return keys on the endpoint - app.env = _.omit(JSON.parse(app.env), config.privateAppEnvVars) - # Don't return data that will be of no use to the user - res.json(app) + application.getContainerId(app) + .then (containerId) -> + # Don't return keys on the endpoint + app.env = _.omit(JSON.parse(app.env), config.privateAppEnvVars) + app.containerId = containerId + # Don't return data that will be of no use to the user + res.json(_.omit(app, [ 'containerName' ])) .catch utils.AppNotFoundError, (e) -> return res.status(400).send(e.message) .catch (err) -> diff --git a/src/application.coffee b/src/application.coffee index 6ac6d64e..24f587c0 100644 --- a/src/application.coffee +++ b/src/application.coffee @@ -18,6 +18,7 @@ proxyvisor = require './proxyvisor' { checkInt, checkTruthy } = require './lib/validation' osRelease = require './lib/os-release' deviceConfig = require './device-config' +randomHexString = require './lib/random-hex-string' class UpdatesLockedError extends TypedError ImageNotFoundError = (err) -> @@ -155,7 +156,7 @@ logSpecialAction = (action, value, success) -> application.kill = kill = (app, { updateDB = true, removeContainer = true } = {}) -> logSystemEvent(logTypes.stopApp, app) device.updateState(status: 'Stopping') - container = docker.getContainer(app.containerId) + container = docker.getContainer(app.containerName) container.stop(t: 10) .then -> container.remove(v: true) if removeContainer @@ -183,7 +184,7 @@ application.kill = kill = (app, { updateDB = true, removeContainer = true } = {} .tap -> logSystemEvent(logTypes.stopAppSuccess, app) if removeContainer && updateDB - app.containerId = null + app.containerName = null knex('app').update(app).where(appId: app.appId) .catch (err) -> logSystemEvent(logTypes.stopAppError, app, err) @@ -209,7 +210,7 @@ fetch = (app, { deltaSource, setDeviceUpdateState = true } = {}) -> device.updateState(download_progress: progress.percentage) docker.getImage(app.imageId).inspect() - .catch (error) -> + .catch ImageNotFoundError, -> device.updateState(status: 'Downloading', download_progress: 0) Promise.try -> @@ -255,6 +256,12 @@ isExecFormatError = (err) -> message = err.json.trim() /exec format error$/.test(message) +generateContainerName = (app) -> + randomHexString.generate() + .then (randomString) -> + sanitisedAppName = app.name.replace(/[^a-zA-Z0-9]+/g, '_') + return "#{sanitisedAppName}_#{randomString}" + application.start = start = (app) -> device.isResinOSv1().then (isV1) -> volumes = utils.defaultVolumes(isV1) @@ -270,50 +277,58 @@ application.start = start = (app) -> .map((port) -> port.trim()) .filter(isValidPort) - if app.containerId? + if app.containerName? # If we have a container id then check it exists and if so use it. - container = docker.getContainer(app.containerId) + container = docker.getContainer(app.containerName) containerPromise = container.inspect().return(container) else containerPromise = Promise.rejected() # If there is no existing container then create one instead. containerPromise.catch -> - fetch(app) + # If the image is not available, we'll let the update cycle fix it + docker.getImage(app.imageId).inspect() .then (imageInfo) -> logSystemEvent(logTypes.installApp, app) device.updateState(status: 'Installing') + generateContainerName(app) + .tap (name) -> + app.containerName = name + knex('app').update(app).where(appId: app.appId) + .then (affectedRows) -> + knex('app').insert(app) if affectedRows == 0 + .then (name) -> + ports = {} + portBindings = {} + if portList? + portList.forEach (port) -> + ports[port + '/tcp'] = {} + portBindings[port + '/tcp'] = [ HostPort: port ] - ports = {} - portBindings = {} - if portList? - portList.forEach (port) -> - ports[port + '/tcp'] = {} - portBindings[port + '/tcp'] = [ HostPort: port ] + if imageInfo?.Config?.Cmd + cmd = imageInfo.Config.Cmd + else + cmd = [ '/bin/bash', '-c', '/start' ] - if imageInfo?.Config?.Cmd - cmd = imageInfo.Config.Cmd - else - cmd = [ '/bin/bash', '-c', '/start' ] - - restartPolicy = createRestartPolicy({ name: conf['RESIN_APP_RESTART_POLICY'], maximumRetryCount: conf['RESIN_APP_RESTART_RETRIES'] }) - shouldMountKmod(app.imageId) - .then (shouldMount) -> - binds.push('/bin/kmod:/bin/kmod:ro') if shouldMount - docker.createContainer( - Image: app.imageId - Cmd: cmd - Tty: true - Volumes: volumes - Env: _.map env, (v, k) -> k + '=' + v - ExposedPorts: ports - HostConfig: - Privileged: true - NetworkMode: 'host' - PortBindings: portBindings - Binds: binds - RestartPolicy: restartPolicy - ) + restartPolicy = createRestartPolicy({ name: conf['RESIN_APP_RESTART_POLICY'], maximumRetryCount: conf['RESIN_APP_RESTART_RETRIES'] }) + shouldMountKmod(app.imageId) + .then (shouldMount) -> + binds.push('/bin/kmod:/bin/kmod:ro') if shouldMount + docker.createContainer( + name: name + Image: app.imageId + Cmd: cmd + Tty: true + Volumes: volumes + Env: _.map env, (v, k) -> k + '=' + v + ExposedPorts: ports + HostConfig: + Privileged: true + NetworkMode: 'host' + PortBindings: portBindings + Binds: binds + RestartPolicy: restartPolicy + ) .tap -> logSystemEvent(logTypes.installAppSuccess, app) .catch (err) -> @@ -341,21 +356,20 @@ application.start = start = (app) -> .catch (err) -> # If starting the container failed, we remove it so that it doesn't litter container.remove(v: true) - .then -> - app.containerId = null - knex('app').update(app).where(appId: app.appId) .finally -> - logSystemEvent(logTypes.startAppError, app, err) throw err .then -> - app.containerId = container.id device.updateState(commit: app.commit) logger.attach(app) - .tap (container) -> - # Update the app info, only if starting the container worked. - knex('app').update(app).where(appId: app.appId) - .then (affectedRows) -> - knex('app').insert(app) if affectedRows == 0 + .catch (err) -> + # If creating or starting the app failed, we soft-delete it so that + # the next update cycle doesn't think the app is up to date + app.containerName = null + app.markedForDeletion = true + knex('app').update(app).where(appId: app.appId) + .finally -> + logSystemEvent(logTypes.startAppError, app, err) + throw err .tap -> if alreadyStarted logSystemEvent(logTypes.startAppNoop, app) @@ -765,7 +779,7 @@ application.update = update = (force, scheduled = false) -> app = localApps[appId] Promise.using lockUpdates(app, force), -> # We get the app from the DB again in case someone restarted it - # (which would have changed its containerId) + # (which would have changed its containerName) utils.getKnexApp(appId) .then(kill) .then -> @@ -848,6 +862,8 @@ application.update = update = (force, scheduled = false) -> device.updateState(status: 'Idle') return +sanitiseContainerName = (name) -> name.replace(/^\//, '') + listenToEvents = do -> appHasDied = {} return -> @@ -859,14 +875,14 @@ listenToEvents = do -> parser.on 'error', (err) -> console.error('Error on docker events JSON stream:', err, err.stack) parser.on 'data', (data) -> - if data?.Type? && data.Type == 'container' && data.status in ['die', 'start'] - knex('app').select().where({ containerId: data.id }) + if data?.Type? && data.Type == 'container' && data.status in ['die', 'start'] && data.Actor?.Attributes?.Name? + knex('app').select().where({ containerName: sanitiseContainerName(data.Actor.Attributes.Name) }) .then ([ app ]) -> if app? if data.status == 'die' logSystemEvent(logTypes.appExit, app) - appHasDied[app.containerId] = true - else if data.status == 'start' and appHasDied[app.containerId] + appHasDied[app.containerName] = true + else if data.status == 'start' and appHasDied[app.containerName] logSystemEvent(logTypes.appRestart, app) logger.attach(app) .catch (err) -> @@ -878,9 +894,35 @@ listenToEvents = do -> .catch (err) -> console.error('Error listening to events:', err, err.stack) +migrateContainerIdApps = -> + knex.schema.hasColumn('app', 'containerId') + .then (exists) -> + return if not exists + knex('app').whereNotNull('containerId').select() + .then (apps) -> + return if !apps? or apps.length == 0 + Promise.map apps, (app) -> + docker.getContainer(app.containerId).inspect() + .catchReturn({ Name: null }) + .then (container) -> + app.containerName = sanitiseContainerName(container.Name) + app.containerId = null + knex('app').update(app).where({ id: app.id }) + .then -> + knex.schema.table 'app', (table) -> + table.dropColumn('containerId') + +application.getContainerId = (app) -> + docker.getContainer(app.containerName).inspect() + .catchReturn({ Id: null }) + .then (container) -> + return container.Id + application.initialize = -> listenToEvents() - getAndApplyDeviceConfig() + migrateContainerIdApps() + .then -> + getAndApplyDeviceConfig() .then -> knex('app').whereNot(markedForDeletion: true).orWhereNull('markedForDeletion').select() .map (app) -> diff --git a/src/db.coffee b/src/db.coffee index ac611e41..342ac82e 100644 --- a/src/db.coffee +++ b/src/db.coffee @@ -40,7 +40,7 @@ knex.init = Promise.all([ knex.schema.createTable 'app', (t) -> t.increments('id').primary() t.string('name') - t.string('containerId') + t.string('containerName') t.string('commit') t.string('imageId') t.string('appId') @@ -52,6 +52,7 @@ knex.init = Promise.all([ Promise.all [ addColumn('app', 'commit', 'string') addColumn('app', 'appId', 'string') + addColumn('app', 'containerName', 'string') addColumn('app', 'config', 'json') addColumn('app', 'markedForDeletion', 'boolean') ] diff --git a/src/docker-utils.coffee b/src/docker-utils.coffee index 3b001d9e..17e7bcdc 100644 --- a/src/docker-utils.coffee +++ b/src/docker-utils.coffee @@ -138,62 +138,60 @@ do -> Promise.using writeLockImages(), -> Promise.join( knex('app').select() - .map ({ imageId }) -> - normalizeRepoTag(imageId) + .map (app) -> + app.imageId = normalizeRepoTag(app.imageId) + return Promise.props(app) knex('dependentApp').select().whereNotNull('imageId') .map ({ imageId }) -> - normalizeRepoTag(imageId) + return normalizeRepoTag(imageId) supervisorTagPromise docker.listImages() .map (image) -> image.NormalizedRepoTags = Promise.map(image.RepoTags, normalizeRepoTag) - Promise.props(image) + return Promise.props(image) Promise.map(extraImagesToIgnore, normalizeRepoTag) (apps, dependentApps, supervisorTag, images, normalizedExtraImages) -> + appNames = _.map(apps, 'containerName') + appImages = _.map(apps, 'imageId') imageTags = _.map(images, 'NormalizedRepoTags') - supervisorTags = _.filter imageTags, (tags) -> - _.includes(tags, supervisorTag) appTags = _.filter imageTags, (tags) -> _.some tags, (tag) -> - _.includes(apps, tag) or _.includes(dependentApps, tag) - extraTags = _.filter imageTags, (tags) -> - _.some tags, (tag) -> - _.includes(normalizedExtraImages, tag) - supervisorTags = _.flatten(supervisorTags) + _.includes(appImages, tag) or _.includes(dependentApps, tag) appTags = _.flatten(appTags) + supervisorTags = _.filter imageTags, (tags) -> + _.includes(tags, supervisorTag) + supervisorTags = _.flatten(supervisorTags) + extraTags = _.filter imageTags, (tags) -> + _.some(tags, (tag) -> _.includes(normalizedExtraImages, tag)) extraTags = _.flatten(extraTags) - - return { images, supervisorTags, appTags, extraTags } - ) - .then ({ images, supervisorTags, appTags, extraTags }) -> - # Cleanup containers first, so that they don't block image removal. - docker.listContainers(all: true) - .filter (containerInfo) -> - # Do not remove user apps. - normalizeRepoTag(containerInfo.Image) - .then (repoTag) -> - if _.includes(appTags, repoTag) - return false - if _.includes(extraTags, repoTag) - return false - if !_.includes(supervisorTags, repoTag) + allProtectedTags = _.union(appTags, supervisorTags, extraTags) + # Cleanup containers first, so that they don't block image removal. + docker.listContainers(all: true) + .filter (containerInfo) -> + # Do not remove user apps. + normalizeRepoTag(containerInfo.Image) + .then (repoTag) -> + if _.includes(appTags, repoTag) + return !_.some containerInfo.Names, (name) -> + _.some appNames, (appContainerName) -> "/#{appContainerName}" == name + if _.includes(supervisorTags, repoTag) + return containerHasExited(containerInfo.Id) return true - return containerHasExited(containerInfo.Id) - .map (containerInfo) -> - docker.getContainer(containerInfo.Id).remove(v: true, force: true) + .map (containerInfo) -> + docker.getContainer(containerInfo.Id).remove(v: true, force: true) + .then -> + console.log('Deleted container:', containerInfo.Id, containerInfo.Image) + .catch(_.noop) .then -> - console.log('Deleted container:', containerInfo.Id, containerInfo.Image) - .catch(_.noop) - .then -> - imagesToClean = _.reject images, (image) -> - _.some image.NormalizedRepoTags, (tag) -> - return _.includes(appTags, tag) or _.includes(supervisorTags, tag) or _.includes(extraTags, tag) - Promise.map imagesToClean, (image) -> - Promise.map image.RepoTags.concat(image.Id), (tag) -> - docker.getImage(tag).remove(force: true) - .then -> - console.log('Deleted image:', tag, image.Id, image.RepoTags) - .catch(_.noop) + imagesToClean = _.reject images, (image) -> + _.some(image.NormalizedRepoTags, (tag) -> _.includes(allProtectedTags, tag)) + Promise.map imagesToClean, (image) -> + Promise.map image.RepoTags.concat(image.Id), (tag) -> + docker.getImage(tag).remove(force: true) + .then -> + console.log('Deleted image:', tag, image.Id, image.RepoTags) + .catch(_.noop) + ) containerHasExited = (id) -> docker.getContainer(id).inspect() diff --git a/src/lib/logger.coffee b/src/lib/logger.coffee index e7e4b7cb..873b6141 100644 --- a/src/lib/logger.coffee +++ b/src/lib/logger.coffee @@ -80,19 +80,19 @@ exports.log = -> do -> _lock = new Lock() _writeLock = Promise.promisify(_lock.async.writeLock) - loggerLock = (containerId) -> - _writeLock(containerId) + loggerLock = (containerName) -> + _writeLock(containerName) .disposer (release) -> release() attached = {} exports.attach = (app) -> - Promise.using loggerLock(app.containerId), -> - if !attached[app.containerId] - docker.getContainer(app.containerId) + Promise.using loggerLock(app.containerName), -> + if !attached[app.containerName] + docker.getContainer(app.containerName) .logs({ follow: true, stdout: true, stderr: true, timestamps: true }) .then (stream) -> - attached[app.containerId] = true + attached[app.containerName] = true stream.pipe(es.split()) .on 'data', (logLine) -> space = logLine.indexOf(' ') @@ -101,6 +101,6 @@ do -> publish(msg) .on 'error', (err) -> console.error('Error on container logs', err, err.stack) - attached[app.containerId] = false + attached[app.containerName] = false .on 'end', -> - attached[app.containerId] = false + attached[app.containerName] = false