Add exponential backoff to P2P messaging retry (#2975)

This commit is contained in:
Thomas Schroeter 2018-04-26 13:58:41 +01:00 committed by GitHub
parent 5dc71fc350
commit 2ff3939e2e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 43 additions and 12 deletions

View File

@ -86,6 +86,13 @@ absolute path to the node's base directory.
here must be externally accessible when running nodes across a cluster of machines. If the provided host is unreachable, here must be externally accessible when running nodes across a cluster of machines. If the provided host is unreachable,
the node will try to auto-discover its public one. the node will try to auto-discover its public one.
:p2pMessagingRetry: Only used for notarisation requests. When the response doesn't arrive in time, the message is
resent to a different notary-replica round-robin in case of clustered notaries.
:messageRedeliveryDelay: The initial retry delay, e.g. `30 seconds`.
:maxRetryCount: How many retries to attempt.
:backoffBase: The base of the exponential backoff, :math:`t_{wait} = messageRedeliveryDelay * backoffBase^{retryCount}`.
:rpcAddress: The address of the RPC system on which RPC requests can be made to the node. If not provided then the node will run without RPC. This is now deprecated in favour of the ``rpcSettings`` block. :rpcAddress: The address of the RPC system on which RPC requests can be made to the node. If not provided then the node will run without RPC. This is now deprecated in favour of the ``rpcSettings`` block.
:rpcSettings: Options for the RPC server. :rpcSettings: Options for the RPC server.

View File

@ -16,6 +16,7 @@ import java.net.Proxy
import java.net.URL import java.net.URL
import java.nio.file.Path import java.nio.file.Path
import java.nio.file.Paths import java.nio.file.Paths
import java.time.Duration
import java.time.Instant import java.time.Instant
import java.time.LocalDate import java.time.LocalDate
import java.time.temporal.Temporal import java.time.temporal.Temporal
@ -104,6 +105,7 @@ private fun Config.getSingleValue(path: String, type: KType): Any? {
Double::class -> getDouble(path) Double::class -> getDouble(path)
Boolean::class -> getBoolean(path) Boolean::class -> getBoolean(path)
LocalDate::class -> LocalDate.parse(getString(path)) LocalDate::class -> LocalDate.parse(getString(path))
Duration::class -> getDuration(path)
Instant::class -> Instant.parse(getString(path)) Instant::class -> Instant.parse(getString(path))
NetworkHostAndPort::class -> NetworkHostAndPort.parse(getString(path)) NetworkHostAndPort::class -> NetworkHostAndPort.parse(getString(path))
Path::class -> Paths.get(getString(path)) Path::class -> Paths.get(getString(path))

View File

@ -118,7 +118,8 @@ class P2PMessagingTest {
} }
private fun DriverDSL.startAlice(): InProcess { private fun DriverDSL.startAlice(): InProcess {
return startNode(providedName = ALICE_NAME, customOverrides = mapOf("messageRedeliveryDelaySeconds" to 1)) return startNode(providedName = ALICE_NAME, customOverrides = mapOf("p2pMessagingRetry" to mapOf(
"messageRedeliveryDelay" to 1.seconds, "backoffBase" to 1.0, "maxRetryCount" to 3)))
.map { (it as InProcess) } .map { (it as InProcess) }
.getOrThrow() .getOrThrow()
} }

View File

@ -35,7 +35,7 @@ interface NodeConfiguration : NodeSSLConfiguration {
val compatibilityZoneURL: URL? val compatibilityZoneURL: URL?
val certificateChainCheckPolicies: List<CertChainPolicyConfig> val certificateChainCheckPolicies: List<CertChainPolicyConfig>
val verifierType: VerifierType val verifierType: VerifierType
val messageRedeliveryDelaySeconds: Int val p2pMessagingRetry: P2PMessagingRetryConfiguration
val notary: NotaryConfig? val notary: NotaryConfig?
val additionalNodeInfoPollingFrequencyMsec: Long val additionalNodeInfoPollingFrequencyMsec: Long
val p2pAddress: NetworkHostAndPort val p2pAddress: NetworkHostAndPort
@ -108,6 +108,18 @@ data class BFTSMaRtConfiguration(
} }
} }
/**
* Currently only used for notarisation requests.
*
* When the response doesn't arrive in time, the message is resent to a different notary-replica round-robin
* in case of clustered notaries.
*/
data class P2PMessagingRetryConfiguration(
val messageRedeliveryDelay: Duration,
val maxRetryCount: Int,
val backoffBase: Double
)
fun Config.parseAsNodeConfiguration(): NodeConfiguration = parseAs<NodeConfigurationImpl>() fun Config.parseAsNodeConfiguration(): NodeConfiguration = parseAs<NodeConfigurationImpl>()
data class NodeConfigurationImpl( data class NodeConfigurationImpl(
@ -123,9 +135,7 @@ data class NodeConfigurationImpl(
override val rpcUsers: List<User>, override val rpcUsers: List<User>,
override val security : SecurityConfiguration? = null, override val security : SecurityConfiguration? = null,
override val verifierType: VerifierType, override val verifierType: VerifierType,
// TODO typesafe config supports the notion of durations. Make use of that by mapping it to java.time.Duration. override val p2pMessagingRetry: P2PMessagingRetryConfiguration,
// Then rename this to messageRedeliveryDelay and make it of type Duration
override val messageRedeliveryDelaySeconds: Int = 30,
override val p2pAddress: NetworkHostAndPort, override val p2pAddress: NetworkHostAndPort,
private val rpcAddress: NetworkHostAndPort? = null, private val rpcAddress: NetworkHostAndPort? = null,
private val rpcSettings: NodeRpcSettings, private val rpcSettings: NodeRpcSettings,
@ -337,4 +347,4 @@ data class SecurityConfiguration(val authService: SecurityConfiguration.AuthServ
id = AuthServiceId("NODE_CONFIG")) id = AuthServiceId("NODE_CONFIG"))
} }
} }
} }

View File

@ -103,7 +103,6 @@ class P2PMessagingClient(val config: NodeConfiguration,
) : SingletonSerializeAsToken(), MessagingService, AddressToArtemisQueueResolver, AutoCloseable { ) : SingletonSerializeAsToken(), MessagingService, AddressToArtemisQueueResolver, AutoCloseable {
companion object { companion object {
private val log = contextLogger() private val log = contextLogger()
private const val messageMaxRetryCount: Int = 3
fun createMessageToRedeliver(): PersistentMap<Long, Pair<Message, MessageRecipients>, RetryMessage, Long> { fun createMessageToRedeliver(): PersistentMap<Long, Pair<Message, MessageRecipients>, RetryMessage, Long> {
return PersistentMap( return PersistentMap(
@ -131,6 +130,9 @@ class P2PMessagingClient(val config: NodeConfiguration,
} }
} }
private val messageMaxRetryCount: Int = config.p2pMessagingRetry.maxRetryCount
private val backoffBase: Double = config.p2pMessagingRetry.backoffBase
private class InnerState { private class InnerState {
var started = false var started = false
var running = false var running = false
@ -156,7 +158,7 @@ class P2PMessagingClient(val config: NodeConfiguration,
data class HandlerRegistration(val topic: String, val callback: Any) : MessageHandlerRegistration data class HandlerRegistration(val topic: String, val callback: Any) : MessageHandlerRegistration
override val myAddress: SingleMessageRecipient = NodeAddress(myIdentity, advertisedAddress) override val myAddress: SingleMessageRecipient = NodeAddress(myIdentity, advertisedAddress)
private val messageRedeliveryDelaySeconds = config.messageRedeliveryDelaySeconds.toLong() private val messageRedeliveryDelaySeconds = config.p2pMessagingRetry.messageRedeliveryDelay.seconds
private val state = ThreadBox(InnerState()) private val state = ThreadBox(InnerState())
private val knownQueues = Collections.newSetFromMap(ConcurrentHashMap<String, Boolean>()) private val knownQueues = Collections.newSetFromMap(ConcurrentHashMap<String, Boolean>())
@ -526,7 +528,7 @@ class P2PMessagingClient(val config: NodeConfiguration,
scheduledMessageRedeliveries[retryId] = nodeExecutor.schedule({ scheduledMessageRedeliveries[retryId] = nodeExecutor.schedule({
sendWithRetry(retryCount + 1, message, target, retryId) sendWithRetry(retryCount + 1, message, target, retryId)
}, messageRedeliveryDelaySeconds, TimeUnit.SECONDS) },messageRedeliveryDelaySeconds * Math.pow(backoffBase, retryCount.toDouble()).toLong(), TimeUnit.SECONDS)
} }
override fun cancelRedelivery(retryId: Long) { override fun cancelRedelivery(retryId: Long) {

View File

@ -19,4 +19,9 @@ verifierType = InMemory
rpcSettings = { rpcSettings = {
useSsl = false useSsl = false
standAloneBroker = false standAloneBroker = false
} }
p2pMessagingRetry {
messageRedeliveryDelay = 30 seconds
maxRetryCount = 3
backoffBase = 2.0
}

View File

@ -2,6 +2,7 @@ package net.corda.node.services.config
import net.corda.core.internal.div import net.corda.core.internal.div
import net.corda.core.utilities.NetworkHostAndPort import net.corda.core.utilities.NetworkHostAndPort
import net.corda.core.utilities.seconds
import net.corda.testing.core.ALICE_NAME import net.corda.testing.core.ALICE_NAME
import net.corda.testing.node.MockServices.Companion.makeTestDataSourceProperties import net.corda.testing.node.MockServices.Companion.makeTestDataSourceProperties
import net.corda.tools.shell.SSHDConfiguration import net.corda.tools.shell.SSHDConfiguration
@ -73,6 +74,7 @@ class NodeConfigurationImplTest {
verifierType = VerifierType.InMemory, verifierType = VerifierType.InMemory,
p2pAddress = NetworkHostAndPort("localhost", 0), p2pAddress = NetworkHostAndPort("localhost", 0),
messagingServerAddress = null, messagingServerAddress = null,
p2pMessagingRetry = P2PMessagingRetryConfiguration(5.seconds, 3, 1.0),
notary = null, notary = null,
certificateChainCheckPolicies = emptyList(), certificateChainCheckPolicies = emptyList(),
devMode = true, devMode = true,

View File

@ -4,9 +4,11 @@ import com.nhaarman.mockito_kotlin.doReturn
import com.nhaarman.mockito_kotlin.whenever import com.nhaarman.mockito_kotlin.whenever
import net.corda.core.crypto.generateKeyPair import net.corda.core.crypto.generateKeyPair
import net.corda.core.utilities.NetworkHostAndPort import net.corda.core.utilities.NetworkHostAndPort
import net.corda.core.utilities.seconds
import net.corda.node.internal.configureDatabase import net.corda.node.internal.configureDatabase
import net.corda.node.services.config.CertChainPolicyConfig import net.corda.node.services.config.CertChainPolicyConfig
import net.corda.node.services.config.NodeConfiguration import net.corda.node.services.config.NodeConfiguration
import net.corda.node.services.config.P2PMessagingRetryConfiguration
import net.corda.node.services.config.configureWithDevSSLCertificate import net.corda.node.services.config.configureWithDevSSLCertificate
import net.corda.node.services.network.NetworkMapCacheImpl import net.corda.node.services.network.NetworkMapCacheImpl
import net.corda.node.services.network.PersistentNetworkMapCache import net.corda.node.services.network.PersistentNetworkMapCache
@ -69,7 +71,7 @@ class ArtemisMessagingTest {
doReturn(NetworkHostAndPort("0.0.0.0", serverPort)).whenever(it).p2pAddress doReturn(NetworkHostAndPort("0.0.0.0", serverPort)).whenever(it).p2pAddress
doReturn(null).whenever(it).jmxMonitoringHttpPort doReturn(null).whenever(it).jmxMonitoringHttpPort
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
doReturn(5).whenever(it).messageRedeliveryDelaySeconds doReturn(P2PMessagingRetryConfiguration(5.seconds, 3, backoffBase=1.0)).whenever(it).p2pMessagingRetry
} }
LogHelper.setLevel(PersistentUniquenessProvider::class) LogHelper.setLevel(PersistentUniquenessProvider::class)
database = configureDatabase(makeTestDataSourceProperties(), DatabaseConfig(), rigorousMock()) database = configureDatabase(makeTestDataSourceProperties(), DatabaseConfig(), rigorousMock())

View File

@ -463,7 +463,7 @@ private fun mockNodeConfiguration(): NodeConfiguration {
doReturn(null).whenever(it).compatibilityZoneURL doReturn(null).whenever(it).compatibilityZoneURL
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
doReturn(VerifierType.InMemory).whenever(it).verifierType doReturn(VerifierType.InMemory).whenever(it).verifierType
doReturn(5).whenever(it).messageRedeliveryDelaySeconds doReturn(P2PMessagingRetryConfiguration(5.seconds, 3, backoffBase = 1.0)).whenever(it).p2pMessagingRetry
doReturn(5.seconds.toMillis()).whenever(it).additionalNodeInfoPollingFrequencyMsec doReturn(5.seconds.toMillis()).whenever(it).additionalNodeInfoPollingFrequencyMsec
doReturn(null).whenever(it).devModeOptions doReturn(null).whenever(it).devModeOptions
} }