Issue #20: Change the update lock to a temporary filesystem

The lock is now located at `/tmp/resin-supervisor/<appId>/` on the host, and `/tmp/resin/`
on the user container. The old lock location is supported only in Resin OS 1.X (and both locks are
taken in that case).

This fixes the race condition when the app is started before the supervisor, and takes a lock that is
cleared on supervisor startup.

Change-Type: major
Signed-off-by: Pablo Carranza Velez <pablo@resin.io>
This commit is contained in:
Pablo Carranza Velez 2017-03-07 13:09:06 -03:00 committed by Pablo Carranza Vélez
parent 208b799c4b
commit 27690e0c30
4 changed files with 59 additions and 19 deletions

View File

@ -2,13 +2,27 @@
Locking updates means that the Supervisor will not be able to kill your application. This is meant to be used at critical sections of your code where you don't want to be interrupted, or to control that updates are only installed at certain times. Locking updates means that the Supervisor will not be able to kill your application. This is meant to be used at critical sections of your code where you don't want to be interrupted, or to control that updates are only installed at certain times.
In order to do this, users can create a file called `/data/resin-updates.lock` that will prevent the Supervisor from killing and restarting the app. As any other lockfile, the Supervisor itself will create such file before killing the app, so you should only create it in exclusive mode. This means: only create the lockfile if it doesn't already exist. Several tools exist to simplify this process, for example [npm/lockfile](https://github.com/npm/lockfile). In order to do this, users can create a file called `resin-updates.lock` that will prevent the Supervisor from killing and restarting the app. As any other lockfile, the Supervisor itself will create such file before killing the app, so you should only create it in exclusive mode. This means: only create the lockfile if it doesn't already exist. Several tools exist to simplify this process, for example [npm/lockfile](https://github.com/npm/lockfile).
### Location of the lockfile
In supervisor v4.0.0 and higher, the lock is located at `/tmp/resin/resin-updates.lock`. This lock is cleared automatically when the device reboots, so the user app must take it every time it starts up.
Older supervisors have the lock at `/data/resin-updates.lock`. This lock is still supported on devices running ResinOS 1.X. In this case, newer supervisors will try to take *both* locks before killing the application.
The old lock has the problem that the supervisor has to clear whenever it starts up to avoid deadlocks. If the user app
has taken the lock before the supervisor starts up, the lock will be cleared and the app can operate under the false
assumption that updates are locked (see [issue #20](https://github.com/resin-io/resin-supervisor/issues/20)). We therefore strongly recommend switching to the new lock location as soon as possible.
In supervisors >= v4.0.0 and any OS that is not Resin OS 1.X, the old lock location is completely ignored.
### Taking the lock
Using the above-mentioned library, the lock can be acquired like in this CoffeeScript example: Using the above-mentioned library, the lock can be acquired like in this CoffeeScript example:
```coffeescript ```coffeescript
lockFile = require 'lockfile' lockFile = require 'lockfile'
lockFile.lock '/data/resin-updates.lock', (err) -> lockFile.lock '/tmp/resin/resin-updates.lock', (err) ->
# A non-null err probably means the supervisor is about to kill us # A non-null err probably means the supervisor is about to kill us
throw new Error('Could not acquire lock: ', err) if err? throw new Error('Could not acquire lock: ', err) if err?
@ -16,7 +30,7 @@ Using the above-mentioned library, the lock can be acquired like in this CoffeeS
doTheHarlemShake() doTheHarlemShake()
# Now we release the lock, and we can be killed again # Now we release the lock, and we can be killed again
lockFile.unlock '/data/resin-updates.lock', (err) -> lockFile.unlock '/tmp/resin/resin-updates.lock', (err) ->
# If err is not null here, something went really wrong # If err is not null here, something went really wrong
throw err if err? throw err if err?
``` ```

View File

@ -6,6 +6,8 @@ set -o errexit
mkdir -p /dev/net mkdir -p /dev/net
[ -c /dev/net/tun ] || [ -c /dev/net/tun ] ||
mknod /dev/net/tun c 10 200 mknod /dev/net/tun c 10 200
[ -d /mnt/root/tmp/resin-supervisor ] ||
mkdir -p /mnt/root/tmp/resin-supervisor
mount -t tmpfs -o size=1m tmpfs /var/run/resin mount -t tmpfs -o size=1m tmpfs /var/run/resin

View File

@ -175,7 +175,11 @@ application.kill = kill = (app, { updateDB = true, removeContainer = true } = {}
return return
throw err throw err
.tap -> .tap ->
lockFile.unlockAsync(lockPath(app)) lockFile.unlockAsync(tmpLockPath(app))
.tap ->
device.isResinOSv1()
.then (isV1) ->
lockFile.unlockAsync(persistentLockPath(app)) if isV1
.tap -> .tap ->
logSystemEvent(logTypes.stopAppSuccess, app) logSystemEvent(logTypes.stopAppSuccess, app)
if removeContainer && updateDB if removeContainer && updateDB
@ -364,17 +368,21 @@ createRestartPolicy = ({ name, maximumRetryCount }) ->
policy.MaximumRetryCount = maximumRetryCount policy.MaximumRetryCount = maximumRetryCount
return policy return policy
lockPath = (app) -> persistentLockPath = (app) ->
appId = app.appId ? app appId = app.appId ? app
return "/mnt/root#{config.dataPath}/#{appId}/resin-updates.lock" return "/mnt/root#{config.dataPath}/#{appId}/resin-updates.lock"
tmpLockPath = (app) ->
appId = app.appId ? app
return "/mnt/root/tmp/resin-supervisor/#{appId}/resin-updates.lock"
killmePath = (app) -> killmePath = (app) ->
appId = app.appId ? app appId = app.appId ? app
return "/mnt/root#{config.dataPath}/#{appId}/resin-kill-me" return "/mnt/root#{config.dataPath}/#{appId}/resin-kill-me"
# At boot, all apps should be unlocked *before* start to prevent a deadlock # At boot, all apps should be unlocked *before* start to prevent a deadlock
application.unlockAndStart = unlockAndStart = (app) -> application.unlockAndStart = unlockAndStart = (app) ->
lockFile.unlockAsync(lockPath(app)) lockFile.unlockAsync(persistentLockPath(app))
.then -> .then ->
start(app) start(app)
@ -384,20 +392,35 @@ application.lockUpdates = lockUpdates = do ->
_lock = new Lock() _lock = new Lock()
_writeLock = Promise.promisify(_lock.async.writeLock) _writeLock = Promise.promisify(_lock.async.writeLock)
return (app, force) -> return (app, force) ->
lockName = lockPath(app) device.isResinOSv1()
_writeLock(lockName) .then (isV1) ->
.tap (release) -> persistentLockName = persistentLockPath(app)
if force != true tmpLockName = tmpLockPath(app)
lockFile.lockAsync(lockName) _writeLock(tmpLockName)
.catch ENOENT, _.noop .tap (release) ->
.catch (err) -> if isV1 and force != true
lockFile.lockAsync(persistentLockName)
.catch ENOENT, _.noop
.catch (err) ->
release()
throw new UpdatesLockedError("Updates are locked: #{err.message}")
.tap (release) ->
if force != true
lockFile.lockAsync(tmpLockName)
.catch ENOENT, _.noop
.catch (err) ->
Promise.try ->
lockFile.unlockAsync(persistentLockName) if isV1
.finally ->
release()
throw new UpdatesLockedError("Updates are locked: #{err.message}")
.disposer (release) ->
Promise.try ->
lockFile.unlockAsync(tmpLockName) if force != true
.then ->
lockFile.unlockAsync(persistentLockName) if isV1 and force != true
.finally ->
release() release()
throw new UpdatesLockedError("Updates are locked: #{err.message}")
.disposer (release) ->
Promise.try ->
lockFile.unlockAsync(lockName) if force != true
.finally ->
release()
joinErrorMessages = (failures) -> joinErrorMessages = (failures) ->
s = if failures.length > 1 then 's' else '' s = if failures.length > 1 then 's' else ''

View File

@ -265,6 +265,7 @@ exports.getDataPath = (identifier) ->
exports.defaultBinds = (dataPath, includeV1Binds) -> exports.defaultBinds = (dataPath, includeV1Binds) ->
binds = [ binds = [
exports.getDataPath(dataPath) + ':/data' exports.getDataPath(dataPath) + ':/data'
"/tmp/resin-supervisor/#{dataPath}:/tmp/resin"
'/lib/modules:/lib/modules' '/lib/modules:/lib/modules'
'/lib/firmware:/lib/firmware' '/lib/firmware:/lib/firmware'
'/run/dbus:/host/run/dbus' '/run/dbus:/host/run/dbus'