mirror of
https://github.com/balena-os/balena-supervisor.git
synced 2025-01-30 16:14:11 +00:00
Issue #20: Change the update lock to a temporary filesystem
The lock is now located at `/tmp/resin-supervisor/<appId>/` on the host, and `/tmp/resin/` on the user container. The old lock location is supported only in Resin OS 1.X (and both locks are taken in that case). This fixes the race condition when the app is started before the supervisor, and takes a lock that is cleared on supervisor startup. Change-Type: major Signed-off-by: Pablo Carranza Velez <pablo@resin.io>
This commit is contained in:
parent
208b799c4b
commit
27690e0c30
@ -2,13 +2,27 @@
|
|||||||
|
|
||||||
Locking updates means that the Supervisor will not be able to kill your application. This is meant to be used at critical sections of your code where you don't want to be interrupted, or to control that updates are only installed at certain times.
|
Locking updates means that the Supervisor will not be able to kill your application. This is meant to be used at critical sections of your code where you don't want to be interrupted, or to control that updates are only installed at certain times.
|
||||||
|
|
||||||
In order to do this, users can create a file called `/data/resin-updates.lock` that will prevent the Supervisor from killing and restarting the app. As any other lockfile, the Supervisor itself will create such file before killing the app, so you should only create it in exclusive mode. This means: only create the lockfile if it doesn't already exist. Several tools exist to simplify this process, for example [npm/lockfile](https://github.com/npm/lockfile).
|
In order to do this, users can create a file called `resin-updates.lock` that will prevent the Supervisor from killing and restarting the app. As any other lockfile, the Supervisor itself will create such file before killing the app, so you should only create it in exclusive mode. This means: only create the lockfile if it doesn't already exist. Several tools exist to simplify this process, for example [npm/lockfile](https://github.com/npm/lockfile).
|
||||||
|
|
||||||
|
### Location of the lockfile
|
||||||
|
|
||||||
|
In supervisor v4.0.0 and higher, the lock is located at `/tmp/resin/resin-updates.lock`. This lock is cleared automatically when the device reboots, so the user app must take it every time it starts up.
|
||||||
|
|
||||||
|
Older supervisors have the lock at `/data/resin-updates.lock`. This lock is still supported on devices running ResinOS 1.X. In this case, newer supervisors will try to take *both* locks before killing the application.
|
||||||
|
|
||||||
|
The old lock has the problem that the supervisor has to clear whenever it starts up to avoid deadlocks. If the user app
|
||||||
|
has taken the lock before the supervisor starts up, the lock will be cleared and the app can operate under the false
|
||||||
|
assumption that updates are locked (see [issue #20](https://github.com/resin-io/resin-supervisor/issues/20)). We therefore strongly recommend switching to the new lock location as soon as possible.
|
||||||
|
|
||||||
|
In supervisors >= v4.0.0 and any OS that is not Resin OS 1.X, the old lock location is completely ignored.
|
||||||
|
|
||||||
|
### Taking the lock
|
||||||
|
|
||||||
Using the above-mentioned library, the lock can be acquired like in this CoffeeScript example:
|
Using the above-mentioned library, the lock can be acquired like in this CoffeeScript example:
|
||||||
```coffeescript
|
```coffeescript
|
||||||
lockFile = require 'lockfile'
|
lockFile = require 'lockfile'
|
||||||
|
|
||||||
lockFile.lock '/data/resin-updates.lock', (err) ->
|
lockFile.lock '/tmp/resin/resin-updates.lock', (err) ->
|
||||||
# A non-null err probably means the supervisor is about to kill us
|
# A non-null err probably means the supervisor is about to kill us
|
||||||
throw new Error('Could not acquire lock: ', err) if err?
|
throw new Error('Could not acquire lock: ', err) if err?
|
||||||
|
|
||||||
@ -16,7 +30,7 @@ Using the above-mentioned library, the lock can be acquired like in this CoffeeS
|
|||||||
doTheHarlemShake()
|
doTheHarlemShake()
|
||||||
|
|
||||||
# Now we release the lock, and we can be killed again
|
# Now we release the lock, and we can be killed again
|
||||||
lockFile.unlock '/data/resin-updates.lock', (err) ->
|
lockFile.unlock '/tmp/resin/resin-updates.lock', (err) ->
|
||||||
# If err is not null here, something went really wrong
|
# If err is not null here, something went really wrong
|
||||||
throw err if err?
|
throw err if err?
|
||||||
```
|
```
|
||||||
|
2
entry.sh
2
entry.sh
@ -6,6 +6,8 @@ set -o errexit
|
|||||||
mkdir -p /dev/net
|
mkdir -p /dev/net
|
||||||
[ -c /dev/net/tun ] ||
|
[ -c /dev/net/tun ] ||
|
||||||
mknod /dev/net/tun c 10 200
|
mknod /dev/net/tun c 10 200
|
||||||
|
[ -d /mnt/root/tmp/resin-supervisor ] ||
|
||||||
|
mkdir -p /mnt/root/tmp/resin-supervisor
|
||||||
|
|
||||||
mount -t tmpfs -o size=1m tmpfs /var/run/resin
|
mount -t tmpfs -o size=1m tmpfs /var/run/resin
|
||||||
|
|
||||||
|
@ -175,7 +175,11 @@ application.kill = kill = (app, { updateDB = true, removeContainer = true } = {}
|
|||||||
return
|
return
|
||||||
throw err
|
throw err
|
||||||
.tap ->
|
.tap ->
|
||||||
lockFile.unlockAsync(lockPath(app))
|
lockFile.unlockAsync(tmpLockPath(app))
|
||||||
|
.tap ->
|
||||||
|
device.isResinOSv1()
|
||||||
|
.then (isV1) ->
|
||||||
|
lockFile.unlockAsync(persistentLockPath(app)) if isV1
|
||||||
.tap ->
|
.tap ->
|
||||||
logSystemEvent(logTypes.stopAppSuccess, app)
|
logSystemEvent(logTypes.stopAppSuccess, app)
|
||||||
if removeContainer && updateDB
|
if removeContainer && updateDB
|
||||||
@ -364,17 +368,21 @@ createRestartPolicy = ({ name, maximumRetryCount }) ->
|
|||||||
policy.MaximumRetryCount = maximumRetryCount
|
policy.MaximumRetryCount = maximumRetryCount
|
||||||
return policy
|
return policy
|
||||||
|
|
||||||
lockPath = (app) ->
|
persistentLockPath = (app) ->
|
||||||
appId = app.appId ? app
|
appId = app.appId ? app
|
||||||
return "/mnt/root#{config.dataPath}/#{appId}/resin-updates.lock"
|
return "/mnt/root#{config.dataPath}/#{appId}/resin-updates.lock"
|
||||||
|
|
||||||
|
tmpLockPath = (app) ->
|
||||||
|
appId = app.appId ? app
|
||||||
|
return "/mnt/root/tmp/resin-supervisor/#{appId}/resin-updates.lock"
|
||||||
|
|
||||||
killmePath = (app) ->
|
killmePath = (app) ->
|
||||||
appId = app.appId ? app
|
appId = app.appId ? app
|
||||||
return "/mnt/root#{config.dataPath}/#{appId}/resin-kill-me"
|
return "/mnt/root#{config.dataPath}/#{appId}/resin-kill-me"
|
||||||
|
|
||||||
# At boot, all apps should be unlocked *before* start to prevent a deadlock
|
# At boot, all apps should be unlocked *before* start to prevent a deadlock
|
||||||
application.unlockAndStart = unlockAndStart = (app) ->
|
application.unlockAndStart = unlockAndStart = (app) ->
|
||||||
lockFile.unlockAsync(lockPath(app))
|
lockFile.unlockAsync(persistentLockPath(app))
|
||||||
.then ->
|
.then ->
|
||||||
start(app)
|
start(app)
|
||||||
|
|
||||||
@ -384,20 +392,35 @@ application.lockUpdates = lockUpdates = do ->
|
|||||||
_lock = new Lock()
|
_lock = new Lock()
|
||||||
_writeLock = Promise.promisify(_lock.async.writeLock)
|
_writeLock = Promise.promisify(_lock.async.writeLock)
|
||||||
return (app, force) ->
|
return (app, force) ->
|
||||||
lockName = lockPath(app)
|
device.isResinOSv1()
|
||||||
_writeLock(lockName)
|
.then (isV1) ->
|
||||||
.tap (release) ->
|
persistentLockName = persistentLockPath(app)
|
||||||
if force != true
|
tmpLockName = tmpLockPath(app)
|
||||||
lockFile.lockAsync(lockName)
|
_writeLock(tmpLockName)
|
||||||
.catch ENOENT, _.noop
|
.tap (release) ->
|
||||||
.catch (err) ->
|
if isV1 and force != true
|
||||||
|
lockFile.lockAsync(persistentLockName)
|
||||||
|
.catch ENOENT, _.noop
|
||||||
|
.catch (err) ->
|
||||||
|
release()
|
||||||
|
throw new UpdatesLockedError("Updates are locked: #{err.message}")
|
||||||
|
.tap (release) ->
|
||||||
|
if force != true
|
||||||
|
lockFile.lockAsync(tmpLockName)
|
||||||
|
.catch ENOENT, _.noop
|
||||||
|
.catch (err) ->
|
||||||
|
Promise.try ->
|
||||||
|
lockFile.unlockAsync(persistentLockName) if isV1
|
||||||
|
.finally ->
|
||||||
|
release()
|
||||||
|
throw new UpdatesLockedError("Updates are locked: #{err.message}")
|
||||||
|
.disposer (release) ->
|
||||||
|
Promise.try ->
|
||||||
|
lockFile.unlockAsync(tmpLockName) if force != true
|
||||||
|
.then ->
|
||||||
|
lockFile.unlockAsync(persistentLockName) if isV1 and force != true
|
||||||
|
.finally ->
|
||||||
release()
|
release()
|
||||||
throw new UpdatesLockedError("Updates are locked: #{err.message}")
|
|
||||||
.disposer (release) ->
|
|
||||||
Promise.try ->
|
|
||||||
lockFile.unlockAsync(lockName) if force != true
|
|
||||||
.finally ->
|
|
||||||
release()
|
|
||||||
|
|
||||||
joinErrorMessages = (failures) ->
|
joinErrorMessages = (failures) ->
|
||||||
s = if failures.length > 1 then 's' else ''
|
s = if failures.length > 1 then 's' else ''
|
||||||
|
@ -265,6 +265,7 @@ exports.getDataPath = (identifier) ->
|
|||||||
exports.defaultBinds = (dataPath, includeV1Binds) ->
|
exports.defaultBinds = (dataPath, includeV1Binds) ->
|
||||||
binds = [
|
binds = [
|
||||||
exports.getDataPath(dataPath) + ':/data'
|
exports.getDataPath(dataPath) + ':/data'
|
||||||
|
"/tmp/resin-supervisor/#{dataPath}:/tmp/resin"
|
||||||
'/lib/modules:/lib/modules'
|
'/lib/modules:/lib/modules'
|
||||||
'/lib/firmware:/lib/firmware'
|
'/lib/firmware:/lib/firmware'
|
||||||
'/run/dbus:/host/run/dbus'
|
'/run/dbus:/host/run/dbus'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user