diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e75cd3f..684954ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file automatically by Versionist. DO NOT EDIT THIS FILE MANUALLY! This project adheres to [Semantic Versioning](http://semver.org/). +## v6.2.0 - 2017-08-16 + +* Try to resume the download of a delta if it fails due to flaky network #483 [Akis Kesoglou] + ## v6.1.4 - 2017-08-07 * Fix references in deploy-to-resin.js and use github credentials when pushing in pr-to-meta-resin.sh #481 [Pablo Carranza Velez] diff --git a/package.json b/package.json index 41dc3836..b12c3246 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "resin-supervisor", "description": "This is resin.io's Supervisor, a program that runs on IoT devices and has the task of running user Apps (which are Docker containers), and updating them as Resin's API informs it to.", - "version": "6.1.4", + "version": "6.2.0", "license": "Apache-2.0", "repository": { "type": "git", @@ -29,7 +29,7 @@ "buffer-equal-constant-time": "^1.0.1", "coffee-loader": "^0.7.3", "coffee-script": "~1.11.0", - "docker-delta": "1.1.1", + "docker-delta": "^2.0.1", "docker-progress": "^2.6.0", "docker-toolbelt": "^3.0.1", "event-stream": "^3.0.20", @@ -46,7 +46,7 @@ "pinejs-client": "^2.4.0", "pubnub": "^3.7.13", "request": "^2.51.0", - "request-progress": "^2.0.1", + "resumable-request": "^1.0.0", "resin-lint": "^1.3.1", "resin-register-device": "^3.0.0", "rimraf": "^2.5.4", diff --git a/src/application.coffee b/src/application.coffee index 87fb5e96..de652be9 100644 --- a/src/application.coffee +++ b/src/application.coffee @@ -19,6 +19,8 @@ proxyvisor = require './proxyvisor' osRelease = require './lib/os-release' deviceConfig = require './device-config' +DEFAULT_DELTA_APPLY_TIMEOUT = 300 * 1000 # 6 minutes + class UpdatesLockedError extends TypedError ImageNotFoundError = (err) -> return "#{err.statusCode}" is '404' @@ -218,9 +220,15 @@ fetch = (app, setDeviceUpdateState = true) -> Promise.join utils.getConfig('apiKey'), utils.getConfig('uuid'), (apiKey, uuid) -> if conf['RESIN_SUPERVISOR_DELTA'] == '1' logSystemEvent(logTypes.downloadAppDelta, app) - requestTimeout = checkInt(conf['RESIN_SUPERVISOR_DELTA_REQUEST_TIMEOUT'], positive: true) ? 30 * 60 * 1000 - totalTimeout = checkInt(conf['RESIN_SUPERVISOR_DELTA_TOTAL_TIMEOUT'], positive: true) ? 24 * 60 * 60 * 1000 - dockerUtils.rsyncImageWithProgress(app.imageId, { requestTimeout, totalTimeout, uuid, apiKey }, onProgress) + deltaOpts = { + uuid, apiKey + # use user-defined timeouts, but fallback to defaults if none is provided. + requestTimeout: checkInt(conf['RESIN_SUPERVISOR_DELTA_REQUEST_TIMEOUT'], positive: true) + applyTimeout: checkInt(conf['RESIN_SUPERVISOR_DELTA_APPLY_TIMEOUT'], positive: true) ? DEFAULT_DELTA_APPLY_TIMEOUT + retryCount: checkInt(conf['RESIN_SUPERVISOR_DELTA_RETRY_COUNT'], positive: true) + retryInterval: checkInt(conf['RESIN_SUPERVISOR_DELTA_RETRY_INTERVAL'], positive: true) + } + dockerUtils.rsyncImageWithProgress(app.imageId, deltaOpts, onProgress) else logSystemEvent(logTypes.downloadApp, app) dockerUtils.fetchImageWithProgress(app.imageId, onProgress, { uuid, apiKey }) diff --git a/src/docker-utils.coffee b/src/docker-utils.coffee index 938c3a91..e7df43ad 100644 --- a/src/docker-utils.coffee +++ b/src/docker-utils.coffee @@ -4,12 +4,11 @@ process.env.DOCKER_HOST ?= "unix://#{config.dockerSocket}" Docker = require 'docker-toolbelt' { DockerProgress } = require 'docker-progress' Promise = require 'bluebird' -progress = require 'request-progress' dockerDelta = require 'docker-delta' _ = require 'lodash' knex = require './db' -{ request } = require './request' +{ request, resumable } = require './request' Lock = require 'rwlock' utils = require './utils' rimraf = Promise.promisify(require('rimraf')) @@ -53,6 +52,23 @@ getRepoAndTag = (image) -> registry = '' return { repo: "#{registry}#{imageName}", tag: tagName } +applyDelta = (imgSrc, deltaUrl, { requestTimeout, applyTimeout, retryCount, retryInterval }, onProgress) -> + new Promise (resolve, reject) -> + resumable(request, { url: deltaUrl, timeout: requestTimeout }) + .on('progress', onProgress) + .on('retry', onProgress) + .on('error', reject) + .on 'response', (res) -> + if res.statusCode isnt 200 + reject(new Error("Got #{res.statusCode} when requesting delta from storage.")) + else if parseInt(res.headers['content-length']) is 0 + reject(new Error('Invalid delta URL.')) + else + deltaStream = dockerDelta.applyDelta(imgSrc, timeout: applyTimeout) + res.pipe(deltaStream) + .on('id', resolve) + .on('error', reject) + do -> _lock = new Lock() _writeLock = Promise.promisify(_lock.async.writeLock) @@ -66,7 +82,7 @@ do -> .disposer (release) -> release() - exports.rsyncImageWithProgress = (imgDest, { requestTimeout, totalTimeout, uuid, apiKey, startFromEmpty = false }, onProgress) -> + exports.rsyncImageWithProgress = (imgDest, { requestTimeout, applyTimeout, retryCount, retryInterval, uuid, apiKey, startFromEmpty = false }, onProgress) -> Promise.using readLockImages(), -> Promise.try -> if startFromEmpty @@ -87,36 +103,30 @@ do -> .get(1) .then (b) -> opts = + followRedirect: false timeout: requestTimeout if b?.token? - deltaAuthOpts = - auth: - bearer: b?.token - sendImmediately: true - opts = _.merge(opts, deltaAuthOpts) + opts.auth = + bearer: b.token + sendImmediately: true new Promise (resolve, reject) -> - progress request.get("#{config.deltaHost}/api/v2/delta?src=#{imgSrc}&dest=#{imgDest}", opts) - .on 'progress', (progress) -> - # In request-progress ^2.0.1, "percentage" is a ratio from 0 to 1 - onProgress(percentage: progress.percentage * 100) - .on 'end', -> - onProgress(percentage: 100) + request.get("#{config.deltaHost}/api/v2/delta?src=#{imgSrc}&dest=#{imgDest}", opts) .on 'response', (res) -> - if res.statusCode is 504 + res.resume() # discard response body -- we only care about response headers + if res.statusCode in [ 502, 504 ] reject(new Error('Delta server is still processing the delta, will retry')) - else if res.statusCode isnt 200 + else if not (300 <= res.statusCode < 400 and res.headers['location']?) reject(new Error("Got #{res.statusCode} when requesting image from delta server.")) else + deltaUrl = res.headers['location'] if imgSrc is 'resin/scratch' deltaSrc = null else deltaSrc = imgSrc - res.pipe(dockerDelta.applyDelta(deltaSrc, imgDest)) - .on('id', resolve) - .on('error', reject) + deltaOpts = { requestTimeout, applyTimeout, retryCount, retryInterval } + resolve(applyDelta(deltaSrc, deltaUrl, deltaOpts, onProgress)) .on 'error', reject - .timeout(totalTimeout) .then (id) -> getRepoAndTag(imgDest) .then ({ repo, tag }) -> diff --git a/src/request.coffee b/src/request.coffee index 496d7ab4..dba8ab44 100644 --- a/src/request.coffee +++ b/src/request.coffee @@ -2,6 +2,7 @@ config = require './config' PlatformAPI = require 'pinejs-client' Promise = require 'bluebird' request = require 'request' +resumable = require 'resumable-request' url = require 'url' osRelease = require './lib/os-release' @@ -16,12 +17,23 @@ if osVersion? else userAgent += " (Linux; #{osVersion})" +# With these settings, the device must be unable to receive a single byte +# from the network for a continuous period of 20 minutes before we give up. +# (reqTimeout + retryInterval) * retryCount / 1000ms / 60sec ~> minutes +DEFAULT_REQUEST_TIMEOUT = 30000 # ms +DEFAULT_REQUEST_RETRY_INTERVAL = 10000 # ms +DEFAULT_REQUEST_RETRY_COUNT = 30 + requestOpts = gzip: true - timeout: 30000 + timeout: DEFAULT_REQUEST_TIMEOUT headers: 'User-Agent': userAgent +resumableOpts = + maxRetries: DEFAULT_REQUEST_RETRY_COUNT + retryInterval: DEFAULT_REQUEST_RETRY_INTERVAL + try PLATFORM_ENDPOINT = url.resolve(config.apiEndpoint, '/v2/') exports.resinApi = resinApi = new PlatformAPI @@ -35,3 +47,5 @@ catch request = request.defaults(requestOpts) exports.request = Promise.promisifyAll(request, multiArgs: true) + +exports.resumable = resumable.defaults(resumableOpts)