Add jitter to flow retries (#1037)

* Add jitter to flow retries

See https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/

* Add two seconds timeout to `timed flow is retried` test

* Minimum jittered delay is one second

* Adjust default retry config

Make sure the last retry fires after more than 20 minutes, which
is twice the default network map update. This way the notary should
have the latest network map when the last retry kicks in.
This commit is contained in:
Thomas Schroeter 2018-07-31 17:24:03 +01:00 committed by GitHub
parent 0e807a2c3c
commit fd06adddad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 4 additions and 3 deletions

View File

@ -182,4 +182,4 @@ class TimedFlowMultiThreadedSMMTests : IntegrationTest() {
}
}
}
}
}

View File

@ -602,10 +602,11 @@ class MultiThreadedStateMachineManager(
private fun scheduleTimeoutException(flow: Flow, retryCount: Int): ScheduledFuture<*> {
return with(serviceHub.configuration.flowTimeout) {
val timeoutDelaySeconds = timeout.seconds * Math.pow(backoffBase, retryCount.toDouble()).toLong()
val jitteredDelaySeconds = maxOf(1L ,timeoutDelaySeconds/2 + (Random().nextDouble() * timeoutDelaySeconds/2).toLong())
timeoutScheduler.schedule({
val event = Event.Error(FlowTimeoutException(maxRestartCount))
flow.fiber.scheduleEvent(event)
}, timeoutDelaySeconds, TimeUnit.SECONDS)
}, jitteredDelaySeconds, TimeUnit.SECONDS)
}
}

View File

@ -46,6 +46,6 @@ rpcSettings = {
}
flowTimeout {
timeout = 30 seconds
maxRestartCount = 5
maxRestartCount = 6
backoffBase = 1.8
}