Add exponential backoff to P2P messaging retry (#2975)

This commit is contained in:
Thomas Schroeter 2018-04-26 13:58:41 +01:00 committed by GitHub
parent 5dc71fc350
commit 2ff3939e2e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 43 additions and 12 deletions

View File

@ -86,6 +86,13 @@ absolute path to the node's base directory.
here must be externally accessible when running nodes across a cluster of machines. If the provided host is unreachable,
the node will try to auto-discover its public one.
:p2pMessagingRetry: Only used for notarisation requests. When the response doesn't arrive in time, the message is
resent to a different notary-replica round-robin in case of clustered notaries.
:messageRedeliveryDelay: The initial retry delay, e.g. `30 seconds`.
:maxRetryCount: How many retries to attempt.
:backoffBase: The base of the exponential backoff, :math:`t_{wait} = messageRedeliveryDelay * backoffBase^{retryCount}`.
:rpcAddress: The address of the RPC system on which RPC requests can be made to the node. If not provided then the node will run without RPC. This is now deprecated in favour of the ``rpcSettings`` block.
:rpcSettings: Options for the RPC server.

View File

@ -16,6 +16,7 @@ import java.net.Proxy
import java.net.URL
import java.nio.file.Path
import java.nio.file.Paths
import java.time.Duration
import java.time.Instant
import java.time.LocalDate
import java.time.temporal.Temporal
@ -104,6 +105,7 @@ private fun Config.getSingleValue(path: String, type: KType): Any? {
Double::class -> getDouble(path)
Boolean::class -> getBoolean(path)
LocalDate::class -> LocalDate.parse(getString(path))
Duration::class -> getDuration(path)
Instant::class -> Instant.parse(getString(path))
NetworkHostAndPort::class -> NetworkHostAndPort.parse(getString(path))
Path::class -> Paths.get(getString(path))

View File

@ -118,7 +118,8 @@ class P2PMessagingTest {
}
private fun DriverDSL.startAlice(): InProcess {
return startNode(providedName = ALICE_NAME, customOverrides = mapOf("messageRedeliveryDelaySeconds" to 1))
return startNode(providedName = ALICE_NAME, customOverrides = mapOf("p2pMessagingRetry" to mapOf(
"messageRedeliveryDelay" to 1.seconds, "backoffBase" to 1.0, "maxRetryCount" to 3)))
.map { (it as InProcess) }
.getOrThrow()
}

View File

@ -35,7 +35,7 @@ interface NodeConfiguration : NodeSSLConfiguration {
val compatibilityZoneURL: URL?
val certificateChainCheckPolicies: List<CertChainPolicyConfig>
val verifierType: VerifierType
val messageRedeliveryDelaySeconds: Int
val p2pMessagingRetry: P2PMessagingRetryConfiguration
val notary: NotaryConfig?
val additionalNodeInfoPollingFrequencyMsec: Long
val p2pAddress: NetworkHostAndPort
@ -108,6 +108,18 @@ data class BFTSMaRtConfiguration(
}
}
/**
* Currently only used for notarisation requests.
*
* When the response doesn't arrive in time, the message is resent to a different notary-replica round-robin
* in case of clustered notaries.
*/
data class P2PMessagingRetryConfiguration(
val messageRedeliveryDelay: Duration,
val maxRetryCount: Int,
val backoffBase: Double
)
fun Config.parseAsNodeConfiguration(): NodeConfiguration = parseAs<NodeConfigurationImpl>()
data class NodeConfigurationImpl(
@ -123,9 +135,7 @@ data class NodeConfigurationImpl(
override val rpcUsers: List<User>,
override val security : SecurityConfiguration? = null,
override val verifierType: VerifierType,
// TODO typesafe config supports the notion of durations. Make use of that by mapping it to java.time.Duration.
// Then rename this to messageRedeliveryDelay and make it of type Duration
override val messageRedeliveryDelaySeconds: Int = 30,
override val p2pMessagingRetry: P2PMessagingRetryConfiguration,
override val p2pAddress: NetworkHostAndPort,
private val rpcAddress: NetworkHostAndPort? = null,
private val rpcSettings: NodeRpcSettings,
@ -337,4 +347,4 @@ data class SecurityConfiguration(val authService: SecurityConfiguration.AuthServ
id = AuthServiceId("NODE_CONFIG"))
}
}
}
}

View File

@ -103,7 +103,6 @@ class P2PMessagingClient(val config: NodeConfiguration,
) : SingletonSerializeAsToken(), MessagingService, AddressToArtemisQueueResolver, AutoCloseable {
companion object {
private val log = contextLogger()
private const val messageMaxRetryCount: Int = 3
fun createMessageToRedeliver(): PersistentMap<Long, Pair<Message, MessageRecipients>, RetryMessage, Long> {
return PersistentMap(
@ -131,6 +130,9 @@ class P2PMessagingClient(val config: NodeConfiguration,
}
}
private val messageMaxRetryCount: Int = config.p2pMessagingRetry.maxRetryCount
private val backoffBase: Double = config.p2pMessagingRetry.backoffBase
private class InnerState {
var started = false
var running = false
@ -156,7 +158,7 @@ class P2PMessagingClient(val config: NodeConfiguration,
data class HandlerRegistration(val topic: String, val callback: Any) : MessageHandlerRegistration
override val myAddress: SingleMessageRecipient = NodeAddress(myIdentity, advertisedAddress)
private val messageRedeliveryDelaySeconds = config.messageRedeliveryDelaySeconds.toLong()
private val messageRedeliveryDelaySeconds = config.p2pMessagingRetry.messageRedeliveryDelay.seconds
private val state = ThreadBox(InnerState())
private val knownQueues = Collections.newSetFromMap(ConcurrentHashMap<String, Boolean>())
@ -526,7 +528,7 @@ class P2PMessagingClient(val config: NodeConfiguration,
scheduledMessageRedeliveries[retryId] = nodeExecutor.schedule({
sendWithRetry(retryCount + 1, message, target, retryId)
}, messageRedeliveryDelaySeconds, TimeUnit.SECONDS)
},messageRedeliveryDelaySeconds * Math.pow(backoffBase, retryCount.toDouble()).toLong(), TimeUnit.SECONDS)
}
override fun cancelRedelivery(retryId: Long) {

View File

@ -19,4 +19,9 @@ verifierType = InMemory
rpcSettings = {
useSsl = false
standAloneBroker = false
}
}
p2pMessagingRetry {
messageRedeliveryDelay = 30 seconds
maxRetryCount = 3
backoffBase = 2.0
}

View File

@ -2,6 +2,7 @@ package net.corda.node.services.config
import net.corda.core.internal.div
import net.corda.core.utilities.NetworkHostAndPort
import net.corda.core.utilities.seconds
import net.corda.testing.core.ALICE_NAME
import net.corda.testing.node.MockServices.Companion.makeTestDataSourceProperties
import net.corda.tools.shell.SSHDConfiguration
@ -73,6 +74,7 @@ class NodeConfigurationImplTest {
verifierType = VerifierType.InMemory,
p2pAddress = NetworkHostAndPort("localhost", 0),
messagingServerAddress = null,
p2pMessagingRetry = P2PMessagingRetryConfiguration(5.seconds, 3, 1.0),
notary = null,
certificateChainCheckPolicies = emptyList(),
devMode = true,

View File

@ -4,9 +4,11 @@ import com.nhaarman.mockito_kotlin.doReturn
import com.nhaarman.mockito_kotlin.whenever
import net.corda.core.crypto.generateKeyPair
import net.corda.core.utilities.NetworkHostAndPort
import net.corda.core.utilities.seconds
import net.corda.node.internal.configureDatabase
import net.corda.node.services.config.CertChainPolicyConfig
import net.corda.node.services.config.NodeConfiguration
import net.corda.node.services.config.P2PMessagingRetryConfiguration
import net.corda.node.services.config.configureWithDevSSLCertificate
import net.corda.node.services.network.NetworkMapCacheImpl
import net.corda.node.services.network.PersistentNetworkMapCache
@ -69,7 +71,7 @@ class ArtemisMessagingTest {
doReturn(NetworkHostAndPort("0.0.0.0", serverPort)).whenever(it).p2pAddress
doReturn(null).whenever(it).jmxMonitoringHttpPort
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
doReturn(5).whenever(it).messageRedeliveryDelaySeconds
doReturn(P2PMessagingRetryConfiguration(5.seconds, 3, backoffBase=1.0)).whenever(it).p2pMessagingRetry
}
LogHelper.setLevel(PersistentUniquenessProvider::class)
database = configureDatabase(makeTestDataSourceProperties(), DatabaseConfig(), rigorousMock())

View File

@ -463,7 +463,7 @@ private fun mockNodeConfiguration(): NodeConfiguration {
doReturn(null).whenever(it).compatibilityZoneURL
doReturn(emptyList<CertChainPolicyConfig>()).whenever(it).certificateChainCheckPolicies
doReturn(VerifierType.InMemory).whenever(it).verifierType
doReturn(5).whenever(it).messageRedeliveryDelaySeconds
doReturn(P2PMessagingRetryConfiguration(5.seconds, 3, backoffBase = 1.0)).whenever(it).p2pMessagingRetry
doReturn(5.seconds.toMillis()).whenever(it).additionalNodeInfoPollingFrequencyMsec
doReturn(null).whenever(it).devModeOptions
}