mirror of
https://github.com/corda/corda.git
synced 2025-05-29 05:34:22 +00:00
Add exponential backoff to P2P messaging retry (#2975)
This commit is contained in:
parent
5dc71fc350
commit
2ff3939e2e
@ -86,6 +86,13 @@ absolute path to the node's base directory.
|
|||||||
here must be externally accessible when running nodes across a cluster of machines. If the provided host is unreachable,
|
here must be externally accessible when running nodes across a cluster of machines. If the provided host is unreachable,
|
||||||
the node will try to auto-discover its public one.
|
the node will try to auto-discover its public one.
|
||||||
|
|
||||||
|
:p2pMessagingRetry: Only used for notarisation requests. When the response doesn't arrive in time, the message is
|
||||||
|
resent to a different notary-replica round-robin in case of clustered notaries.
|
||||||
|
|
||||||
|
:messageRedeliveryDelay: The initial retry delay, e.g. `30 seconds`.
|
||||||
|
:maxRetryCount: How many retries to attempt.
|
||||||
|
:backoffBase: The base of the exponential backoff, :math:`t_{wait} = messageRedeliveryDelay * backoffBase^{retryCount}`.
|
||||||
|
|
||||||
:rpcAddress: The address of the RPC system on which RPC requests can be made to the node. If not provided then the node will run without RPC. This is now deprecated in favour of the ``rpcSettings`` block.
|
:rpcAddress: The address of the RPC system on which RPC requests can be made to the node. If not provided then the node will run without RPC. This is now deprecated in favour of the ``rpcSettings`` block.
|
||||||
|
|
||||||
:rpcSettings: Options for the RPC server.
|
:rpcSettings: Options for the RPC server.
|
||||||
|
@ -16,6 +16,7 @@ import java.net.Proxy
|
|||||||
import java.net.URL
|
import java.net.URL
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
import java.nio.file.Paths
|
import java.nio.file.Paths
|
||||||
|
import java.time.Duration
|
||||||
import java.time.Instant
|
import java.time.Instant
|
||||||
import java.time.LocalDate
|
import java.time.LocalDate
|
||||||
import java.time.temporal.Temporal
|
import java.time.temporal.Temporal
|
||||||
@ -104,6 +105,7 @@ private fun Config.getSingleValue(path: String, type: KType): Any? {
|
|||||||
Double::class -> getDouble(path)
|
Double::class -> getDouble(path)
|
||||||
Boolean::class -> getBoolean(path)
|
Boolean::class -> getBoolean(path)
|
||||||
LocalDate::class -> LocalDate.parse(getString(path))
|
LocalDate::class -> LocalDate.parse(getString(path))
|
||||||
|
Duration::class -> getDuration(path)
|
||||||
Instant::class -> Instant.parse(getString(path))
|
Instant::class -> Instant.parse(getString(path))
|
||||||
NetworkHostAndPort::class -> NetworkHostAndPort.parse(getString(path))
|
NetworkHostAndPort::class -> NetworkHostAndPort.parse(getString(path))
|
||||||
Path::class -> Paths.get(getString(path))
|
Path::class -> Paths.get(getString(path))
|
||||||
|
@ -118,7 +118,8 @@ class P2PMessagingTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun DriverDSL.startAlice(): InProcess {
|
private fun DriverDSL.startAlice(): InProcess {
|
||||||
return startNode(providedName = ALICE_NAME, customOverrides = mapOf("messageRedeliveryDelaySeconds" to 1))
|
return startNode(providedName = ALICE_NAME, customOverrides = mapOf("p2pMessagingRetry" to mapOf(
|
||||||
|
"messageRedeliveryDelay" to 1.seconds, "backoffBase" to 1.0, "maxRetryCount" to 3)))
|
||||||
.map { (it as InProcess) }
|
.map { (it as InProcess) }
|
||||||
.getOrThrow()
|
.getOrThrow()
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,7 @@ interface NodeConfiguration : NodeSSLConfiguration {
|
|||||||
val compatibilityZoneURL: URL?
|
val compatibilityZoneURL: URL?
|
||||||
val certificateChainCheckPolicies: List<CertChainPolicyConfig>
|
val certificateChainCheckPolicies: List<CertChainPolicyConfig>
|
||||||
val verifierType: VerifierType
|
val verifierType: VerifierType
|
||||||
val messageRedeliveryDelaySeconds: Int
|
val p2pMessagingRetry: P2PMessagingRetryConfiguration
|
||||||
val notary: NotaryConfig?
|
val notary: NotaryConfig?
|
||||||
val additionalNodeInfoPollingFrequencyMsec: Long
|
val additionalNodeInfoPollingFrequencyMsec: Long
|
||||||
val p2pAddress: NetworkHostAndPort
|
val p2pAddress: NetworkHostAndPort
|
||||||
@ -108,6 +108,18 @@ data class BFTSMaRtConfiguration(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Currently only used for notarisation requests.
|
||||||
|
*
|
||||||
|
* When the response doesn't arrive in time, the message is resent to a different notary-replica round-robin
|
||||||
|
* in case of clustered notaries.
|
||||||
|
*/
|
||||||
|
data class P2PMessagingRetryConfiguration(
|
||||||
|
val messageRedeliveryDelay: Duration,
|
||||||
|
val maxRetryCount: Int,
|
||||||
|
val backoffBase: Double
|
||||||
|
)
|
||||||
|
|
||||||
fun Config.parseAsNodeConfiguration(): NodeConfiguration = parseAs<NodeConfigurationImpl>()
|
fun Config.parseAsNodeConfiguration(): NodeConfiguration = parseAs<NodeConfigurationImpl>()
|
||||||
|
|
||||||
data class NodeConfigurationImpl(
|
data class NodeConfigurationImpl(
|
||||||
@ -123,9 +135,7 @@ data class NodeConfigurationImpl(
|
|||||||
override val rpcUsers: List<User>,
|
override val rpcUsers: List<User>,
|
||||||
override val security : SecurityConfiguration? = null,
|
override val security : SecurityConfiguration? = null,
|
||||||
override val verifierType: VerifierType,
|
override val verifierType: VerifierType,
|
||||||
// TODO typesafe config supports the notion of durations. Make use of that by mapping it to java.time.Duration.
|
override val p2pMessagingRetry: P2PMessagingRetryConfiguration,
|
||||||
// Then rename this to messageRedeliveryDelay and make it of type Duration
|
|
||||||
override val messageRedeliveryDelaySeconds: Int = 30,
|
|
||||||
override val p2pAddress: NetworkHostAndPort,
|
override val p2pAddress: NetworkHostAndPort,
|
||||||
private val rpcAddress: NetworkHostAndPort? = null,
|
private val rpcAddress: NetworkHostAndPort? = null,
|
||||||
private val rpcSettings: NodeRpcSettings,
|
private val rpcSettings: NodeRpcSettings,
|
||||||
@ -337,4 +347,4 @@ data class SecurityConfiguration(val authService: SecurityConfiguration.AuthServ
|
|||||||
id = AuthServiceId("NODE_CONFIG"))
|
id = AuthServiceId("NODE_CONFIG"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -103,7 +103,6 @@ class P2PMessagingClient(val config: NodeConfiguration,
|
|||||||
) : SingletonSerializeAsToken(), MessagingService, AddressToArtemisQueueResolver, AutoCloseable {
|
) : SingletonSerializeAsToken(), MessagingService, AddressToArtemisQueueResolver, AutoCloseable {
|
||||||
companion object {
|
companion object {
|
||||||
private val log = contextLogger()
|
private val log = contextLogger()
|
||||||
private const val messageMaxRetryCount: Int = 3
|
|
||||||
|
|
||||||
fun createMessageToRedeliver(): PersistentMap<Long, Pair<Message, MessageRecipients>, RetryMessage, Long> {
|
fun createMessageToRedeliver(): PersistentMap<Long, Pair<Message, MessageRecipients>, RetryMessage, Long> {
|
||||||
return PersistentMap(
|
return PersistentMap(
|
||||||
@ -131,6 +130,9 @@ class P2PMessagingClient(val config: NodeConfiguration,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private val messageMaxRetryCount: Int = config.p2pMessagingRetry.maxRetryCount
|
||||||
|
private val backoffBase: Double = config.p2pMessagingRetry.backoffBase
|
||||||
|
|
||||||
private class InnerState {
|
private class InnerState {
|
||||||
var started = false
|
var started = false
|
||||||
var running = false
|
var running = false
|
||||||
@ -156,7 +158,7 @@ class P2PMessagingClient(val config: NodeConfiguration,
|
|||||||
data class HandlerRegistration(val topic: String, val callback: Any) : MessageHandlerRegistration
|
data class HandlerRegistration(val topic: String, val callback: Any) : MessageHandlerRegistration
|
||||||
|
|
||||||
override val myAddress: SingleMessageRecipient = NodeAddress(myIdentity, advertisedAddress)
|
override val myAddress: SingleMessageRecipient = NodeAddress(myIdentity, advertisedAddress)
|
||||||
private val messageRedeliveryDelaySeconds = config.messageRedeliveryDelaySeconds.toLong()
|
private val messageRedeliveryDelaySeconds = config.p2pMessagingRetry.messageRedeliveryDelay.seconds
|
||||||
private val state = ThreadBox(InnerState())
|
private val state = ThreadBox(InnerState())
|
||||||
private val knownQueues = Collections.newSetFromMap(ConcurrentHashMap<String, Boolean>())
|
private val knownQueues = Collections.newSetFromMap(ConcurrentHashMap<String, Boolean>())
|
||||||
|
|
||||||
@ -526,7 +528,7 @@ class P2PMessagingClient(val config: NodeConfiguration,
|
|||||||
|
|
||||||
scheduledMessageRedeliveries[retryId] = nodeExecutor.schedule({
|
scheduledMessageRedeliveries[retryId] = nodeExecutor.schedule({
|
||||||
sendWithRetry(retryCount + 1, message, target, retryId)
|
sendWithRetry(retryCount + 1, message, target, retryId)
|
||||||
}, messageRedeliveryDelaySeconds, TimeUnit.SECONDS)
|
},messageRedeliveryDelaySeconds * Math.pow(backoffBase, retryCount.toDouble()).toLong(), TimeUnit.SECONDS)
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun cancelRedelivery(retryId: Long) {
|
override fun cancelRedelivery(retryId: Long) {
|
||||||
|
@ -19,4 +19,9 @@ verifierType = InMemory
|
|||||||
rpcSettings = {
|
rpcSettings = {
|
||||||
useSsl = false
|
useSsl = false
|
||||||
standAloneBroker = false
|
standAloneBroker = false
|
||||||
}
|
}
|
||||||
|
p2pMessagingRetry {
|
||||||
|
messageRedeliveryDelay = 30 seconds
|
||||||
|
maxRetryCount = 3
|
||||||
|
backoffBase = 2.0
|
||||||
|
}
|
||||||
|
@ -2,6 +2,7 @@ package net.corda.node.services.config
|
|||||||
|
|
||||||
import net.corda.core.internal.div
|
import net.corda.core.internal.div
|
||||||
import net.corda.core.utilities.NetworkHostAndPort
|
import net.corda.core.utilities.NetworkHostAndPort
|
||||||
|
import net.corda.core.utilities.seconds
|
||||||
import net.corda.testing.core.ALICE_NAME
|
import net.corda.testing.core.ALICE_NAME
|
||||||
import net.corda.testing.node.MockServices.Companion.makeTestDataSourceProperties
|
import net.corda.testing.node.MockServices.Companion.makeTestDataSourceProperties
|
||||||
import net.corda.tools.shell.SSHDConfiguration
|
import net.corda.tools.shell.SSHDConfiguration
|
||||||
@ -73,6 +74,7 @@ class NodeConfigurationImplTest {
|
|||||||
verifierType = VerifierType.InMemory,
|
verifierType = VerifierType.InMemory,
|
||||||
p2pAddress = NetworkHostAndPort("localhost", 0),
|
p2pAddress = NetworkHostAndPort("localhost", 0),
|
||||||
messagingServerAddress = null,
|
messagingServerAddress = null,
|
||||||
|
p2pMessagingRetry = P2PMessagingRetryConfiguration(5.seconds, 3, 1.0),
|
||||||
notary = null,
|
notary = null,
|
||||||
certificateChainCheckPolicies = emptyList(),
|
certificateChainCheckPolicies = emptyList(),
|
||||||
devMode = true,
|
devMode = true,
|
||||||
|
@ -4,9 +4,11 @@ import com.nhaarman.mockito_kotlin.doReturn
|
|||||||
import com.nhaarman.mockito_kotlin.whenever
|
import com.nhaarman.mockito_kotlin.whenever
|
||||||
import net.corda.core.crypto.generateKeyPair
|
import net.corda.core.crypto.generateKeyPair
|
||||||
import net.corda.core.utilities.NetworkHostAndPort
|
import net.corda.core.utilities.NetworkHostAndPort
|
||||||
|
import net.corda.core.utilities.seconds
|
||||||
import net.corda.node.internal.configureDatabase
|
import net.corda.node.internal.configureDatabase
|
||||||
import net.corda.node.services.config.CertChainPolicyConfig
|
import net.corda.node.services.config.CertChainPolicyConfig
|
||||||
import net.corda.node.services.config.NodeConfiguration
|
import net.corda.node.services.config.NodeConfiguration
|
||||||
|
import net.corda.node.services.config.P2PMessagingRetryConfiguration
|
||||||
import net.corda.node.services.config.configureWithDevSSLCertificate
|
import net.corda.node.services.config.configureWithDevSSLCertificate
|
||||||
import net.corda.node.services.network.NetworkMapCacheImpl
|
import net.corda.node.services.network.NetworkMapCacheImpl
|
||||||
import net.corda.node.services.network.PersistentNetworkMapCache
|
import net.corda.node.services.network.PersistentNetworkMapCache
|
||||||
@ -69,7 +71,7 @@ class ArtemisMessagingTest {
|
|||||||
doReturn(NetworkHostAndPort("0.0.0.0", serverPort)).whenever(it).p2pAddress
|
doReturn(NetworkHostAndPort("0.0.0.0", serverPort)).whenever(it).p2pAddress
|
||||||
doReturn(null).whenever(it).jmxMonitoringHttpPort
|
doReturn(null).whenever(it).jmxMonitoringHttpPort
|
||||||
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
|
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
|
||||||
doReturn(5).whenever(it).messageRedeliveryDelaySeconds
|
doReturn(P2PMessagingRetryConfiguration(5.seconds, 3, backoffBase=1.0)).whenever(it).p2pMessagingRetry
|
||||||
}
|
}
|
||||||
LogHelper.setLevel(PersistentUniquenessProvider::class)
|
LogHelper.setLevel(PersistentUniquenessProvider::class)
|
||||||
database = configureDatabase(makeTestDataSourceProperties(), DatabaseConfig(), rigorousMock())
|
database = configureDatabase(makeTestDataSourceProperties(), DatabaseConfig(), rigorousMock())
|
||||||
|
@ -463,7 +463,7 @@ private fun mockNodeConfiguration(): NodeConfiguration {
|
|||||||
doReturn(null).whenever(it).compatibilityZoneURL
|
doReturn(null).whenever(it).compatibilityZoneURL
|
||||||
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
|
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
|
||||||
doReturn(VerifierType.InMemory).whenever(it).verifierType
|
doReturn(VerifierType.InMemory).whenever(it).verifierType
|
||||||
doReturn(5).whenever(it).messageRedeliveryDelaySeconds
|
doReturn(P2PMessagingRetryConfiguration(5.seconds, 3, backoffBase = 1.0)).whenever(it).p2pMessagingRetry
|
||||||
doReturn(5.seconds.toMillis()).whenever(it).additionalNodeInfoPollingFrequencyMsec
|
doReturn(5.seconds.toMillis()).whenever(it).additionalNodeInfoPollingFrequencyMsec
|
||||||
doReturn(null).whenever(it).devModeOptions
|
doReturn(null).whenever(it).devModeOptions
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user