mirror of
https://github.com/corda/corda.git
synced 2025-04-07 19:34:41 +00:00
Add exponential backoff to P2P messaging retry (#2975)
This commit is contained in:
parent
5dc71fc350
commit
2ff3939e2e
@ -86,6 +86,13 @@ absolute path to the node's base directory.
|
||||
here must be externally accessible when running nodes across a cluster of machines. If the provided host is unreachable,
|
||||
the node will try to auto-discover its public one.
|
||||
|
||||
:p2pMessagingRetry: Only used for notarisation requests. When the response doesn't arrive in time, the message is
|
||||
resent to a different notary-replica round-robin in case of clustered notaries.
|
||||
|
||||
:messageRedeliveryDelay: The initial retry delay, e.g. `30 seconds`.
|
||||
:maxRetryCount: How many retries to attempt.
|
||||
:backoffBase: The base of the exponential backoff, :math:`t_{wait} = messageRedeliveryDelay * backoffBase^{retryCount}`.
|
||||
|
||||
:rpcAddress: The address of the RPC system on which RPC requests can be made to the node. If not provided then the node will run without RPC. This is now deprecated in favour of the ``rpcSettings`` block.
|
||||
|
||||
:rpcSettings: Options for the RPC server.
|
||||
|
@ -16,6 +16,7 @@ import java.net.Proxy
|
||||
import java.net.URL
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.Paths
|
||||
import java.time.Duration
|
||||
import java.time.Instant
|
||||
import java.time.LocalDate
|
||||
import java.time.temporal.Temporal
|
||||
@ -104,6 +105,7 @@ private fun Config.getSingleValue(path: String, type: KType): Any? {
|
||||
Double::class -> getDouble(path)
|
||||
Boolean::class -> getBoolean(path)
|
||||
LocalDate::class -> LocalDate.parse(getString(path))
|
||||
Duration::class -> getDuration(path)
|
||||
Instant::class -> Instant.parse(getString(path))
|
||||
NetworkHostAndPort::class -> NetworkHostAndPort.parse(getString(path))
|
||||
Path::class -> Paths.get(getString(path))
|
||||
|
@ -118,7 +118,8 @@ class P2PMessagingTest {
|
||||
}
|
||||
|
||||
private fun DriverDSL.startAlice(): InProcess {
|
||||
return startNode(providedName = ALICE_NAME, customOverrides = mapOf("messageRedeliveryDelaySeconds" to 1))
|
||||
return startNode(providedName = ALICE_NAME, customOverrides = mapOf("p2pMessagingRetry" to mapOf(
|
||||
"messageRedeliveryDelay" to 1.seconds, "backoffBase" to 1.0, "maxRetryCount" to 3)))
|
||||
.map { (it as InProcess) }
|
||||
.getOrThrow()
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ interface NodeConfiguration : NodeSSLConfiguration {
|
||||
val compatibilityZoneURL: URL?
|
||||
val certificateChainCheckPolicies: List<CertChainPolicyConfig>
|
||||
val verifierType: VerifierType
|
||||
val messageRedeliveryDelaySeconds: Int
|
||||
val p2pMessagingRetry: P2PMessagingRetryConfiguration
|
||||
val notary: NotaryConfig?
|
||||
val additionalNodeInfoPollingFrequencyMsec: Long
|
||||
val p2pAddress: NetworkHostAndPort
|
||||
@ -108,6 +108,18 @@ data class BFTSMaRtConfiguration(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Currently only used for notarisation requests.
|
||||
*
|
||||
* When the response doesn't arrive in time, the message is resent to a different notary-replica round-robin
|
||||
* in case of clustered notaries.
|
||||
*/
|
||||
data class P2PMessagingRetryConfiguration(
|
||||
val messageRedeliveryDelay: Duration,
|
||||
val maxRetryCount: Int,
|
||||
val backoffBase: Double
|
||||
)
|
||||
|
||||
fun Config.parseAsNodeConfiguration(): NodeConfiguration = parseAs<NodeConfigurationImpl>()
|
||||
|
||||
data class NodeConfigurationImpl(
|
||||
@ -123,9 +135,7 @@ data class NodeConfigurationImpl(
|
||||
override val rpcUsers: List<User>,
|
||||
override val security : SecurityConfiguration? = null,
|
||||
override val verifierType: VerifierType,
|
||||
// TODO typesafe config supports the notion of durations. Make use of that by mapping it to java.time.Duration.
|
||||
// Then rename this to messageRedeliveryDelay and make it of type Duration
|
||||
override val messageRedeliveryDelaySeconds: Int = 30,
|
||||
override val p2pMessagingRetry: P2PMessagingRetryConfiguration,
|
||||
override val p2pAddress: NetworkHostAndPort,
|
||||
private val rpcAddress: NetworkHostAndPort? = null,
|
||||
private val rpcSettings: NodeRpcSettings,
|
||||
@ -337,4 +347,4 @@ data class SecurityConfiguration(val authService: SecurityConfiguration.AuthServ
|
||||
id = AuthServiceId("NODE_CONFIG"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -103,7 +103,6 @@ class P2PMessagingClient(val config: NodeConfiguration,
|
||||
) : SingletonSerializeAsToken(), MessagingService, AddressToArtemisQueueResolver, AutoCloseable {
|
||||
companion object {
|
||||
private val log = contextLogger()
|
||||
private const val messageMaxRetryCount: Int = 3
|
||||
|
||||
fun createMessageToRedeliver(): PersistentMap<Long, Pair<Message, MessageRecipients>, RetryMessage, Long> {
|
||||
return PersistentMap(
|
||||
@ -131,6 +130,9 @@ class P2PMessagingClient(val config: NodeConfiguration,
|
||||
}
|
||||
}
|
||||
|
||||
private val messageMaxRetryCount: Int = config.p2pMessagingRetry.maxRetryCount
|
||||
private val backoffBase: Double = config.p2pMessagingRetry.backoffBase
|
||||
|
||||
private class InnerState {
|
||||
var started = false
|
||||
var running = false
|
||||
@ -156,7 +158,7 @@ class P2PMessagingClient(val config: NodeConfiguration,
|
||||
data class HandlerRegistration(val topic: String, val callback: Any) : MessageHandlerRegistration
|
||||
|
||||
override val myAddress: SingleMessageRecipient = NodeAddress(myIdentity, advertisedAddress)
|
||||
private val messageRedeliveryDelaySeconds = config.messageRedeliveryDelaySeconds.toLong()
|
||||
private val messageRedeliveryDelaySeconds = config.p2pMessagingRetry.messageRedeliveryDelay.seconds
|
||||
private val state = ThreadBox(InnerState())
|
||||
private val knownQueues = Collections.newSetFromMap(ConcurrentHashMap<String, Boolean>())
|
||||
|
||||
@ -526,7 +528,7 @@ class P2PMessagingClient(val config: NodeConfiguration,
|
||||
|
||||
scheduledMessageRedeliveries[retryId] = nodeExecutor.schedule({
|
||||
sendWithRetry(retryCount + 1, message, target, retryId)
|
||||
}, messageRedeliveryDelaySeconds, TimeUnit.SECONDS)
|
||||
},messageRedeliveryDelaySeconds * Math.pow(backoffBase, retryCount.toDouble()).toLong(), TimeUnit.SECONDS)
|
||||
}
|
||||
|
||||
override fun cancelRedelivery(retryId: Long) {
|
||||
|
@ -19,4 +19,9 @@ verifierType = InMemory
|
||||
rpcSettings = {
|
||||
useSsl = false
|
||||
standAloneBroker = false
|
||||
}
|
||||
}
|
||||
p2pMessagingRetry {
|
||||
messageRedeliveryDelay = 30 seconds
|
||||
maxRetryCount = 3
|
||||
backoffBase = 2.0
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package net.corda.node.services.config
|
||||
|
||||
import net.corda.core.internal.div
|
||||
import net.corda.core.utilities.NetworkHostAndPort
|
||||
import net.corda.core.utilities.seconds
|
||||
import net.corda.testing.core.ALICE_NAME
|
||||
import net.corda.testing.node.MockServices.Companion.makeTestDataSourceProperties
|
||||
import net.corda.tools.shell.SSHDConfiguration
|
||||
@ -73,6 +74,7 @@ class NodeConfigurationImplTest {
|
||||
verifierType = VerifierType.InMemory,
|
||||
p2pAddress = NetworkHostAndPort("localhost", 0),
|
||||
messagingServerAddress = null,
|
||||
p2pMessagingRetry = P2PMessagingRetryConfiguration(5.seconds, 3, 1.0),
|
||||
notary = null,
|
||||
certificateChainCheckPolicies = emptyList(),
|
||||
devMode = true,
|
||||
|
@ -4,9 +4,11 @@ import com.nhaarman.mockito_kotlin.doReturn
|
||||
import com.nhaarman.mockito_kotlin.whenever
|
||||
import net.corda.core.crypto.generateKeyPair
|
||||
import net.corda.core.utilities.NetworkHostAndPort
|
||||
import net.corda.core.utilities.seconds
|
||||
import net.corda.node.internal.configureDatabase
|
||||
import net.corda.node.services.config.CertChainPolicyConfig
|
||||
import net.corda.node.services.config.NodeConfiguration
|
||||
import net.corda.node.services.config.P2PMessagingRetryConfiguration
|
||||
import net.corda.node.services.config.configureWithDevSSLCertificate
|
||||
import net.corda.node.services.network.NetworkMapCacheImpl
|
||||
import net.corda.node.services.network.PersistentNetworkMapCache
|
||||
@ -69,7 +71,7 @@ class ArtemisMessagingTest {
|
||||
doReturn(NetworkHostAndPort("0.0.0.0", serverPort)).whenever(it).p2pAddress
|
||||
doReturn(null).whenever(it).jmxMonitoringHttpPort
|
||||
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
|
||||
doReturn(5).whenever(it).messageRedeliveryDelaySeconds
|
||||
doReturn(P2PMessagingRetryConfiguration(5.seconds, 3, backoffBase=1.0)).whenever(it).p2pMessagingRetry
|
||||
}
|
||||
LogHelper.setLevel(PersistentUniquenessProvider::class)
|
||||
database = configureDatabase(makeTestDataSourceProperties(), DatabaseConfig(), rigorousMock())
|
||||
|
@ -463,7 +463,7 @@ private fun mockNodeConfiguration(): NodeConfiguration {
|
||||
doReturn(null).whenever(it).compatibilityZoneURL
|
||||
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
|
||||
doReturn(VerifierType.InMemory).whenever(it).verifierType
|
||||
doReturn(5).whenever(it).messageRedeliveryDelaySeconds
|
||||
doReturn(P2PMessagingRetryConfiguration(5.seconds, 3, backoffBase = 1.0)).whenever(it).p2pMessagingRetry
|
||||
doReturn(5.seconds.toMillis()).whenever(it).additionalNodeInfoPollingFrequencyMsec
|
||||
doReturn(null).whenever(it).devModeOptions
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user