mirror of
https://github.com/corda/corda.git
synced 2025-06-14 05:08:18 +00:00
Do not black-list AMQP targets that suffer a handshake failure
This commit is contained in:
@ -58,7 +58,7 @@ internal class AMQPChannelHandler(private val serverMode: Boolean,
|
||||
private var remoteCert: X509Certificate? = null
|
||||
private var eventProcessor: EventProcessor? = null
|
||||
private var suppressClose: Boolean = false
|
||||
private var badCert: Boolean = false
|
||||
private var connectionResult: ConnectionResult = ConnectionResult.NO_ERROR
|
||||
private var localCert: X509Certificate? = null
|
||||
private var requestedServerName: String? = null
|
||||
|
||||
@ -131,7 +131,7 @@ internal class AMQPChannelHandler(private val serverMode: Boolean,
|
||||
val ch = ctx.channel()
|
||||
logInfoWithMDC { "Closed client connection ${ch.id()} from $remoteAddress to ${ch.localAddress()}" }
|
||||
if (!suppressClose) {
|
||||
onClose(ch as SocketChannel, ConnectionChange(remoteAddress, remoteCert, false, badCert))
|
||||
onClose(ch as SocketChannel, ConnectionChange(remoteAddress, remoteCert, false, connectionResult))
|
||||
}
|
||||
eventProcessor?.close()
|
||||
ctx.fireChannelInactive()
|
||||
@ -274,13 +274,13 @@ internal class AMQPChannelHandler(private val serverMode: Boolean,
|
||||
val remoteX500Name = try {
|
||||
CordaX500Name.build(remoteCert!!.subjectX500Principal)
|
||||
} catch (ex: IllegalArgumentException) {
|
||||
badCert = true
|
||||
connectionResult = ConnectionResult.HANDSHAKE_FAILURE
|
||||
logErrorWithMDC("Certificate subject not a valid CordaX500Name", ex)
|
||||
ctx.close()
|
||||
return
|
||||
}
|
||||
if (allowedRemoteLegalNames != null && remoteX500Name !in allowedRemoteLegalNames) {
|
||||
badCert = true
|
||||
connectionResult = ConnectionResult.HANDSHAKE_FAILURE
|
||||
logErrorWithMDC("Provided certificate subject $remoteX500Name not in expected set $allowedRemoteLegalNames")
|
||||
ctx.close()
|
||||
return
|
||||
@ -288,7 +288,7 @@ internal class AMQPChannelHandler(private val serverMode: Boolean,
|
||||
|
||||
logInfoWithMDC { "Handshake completed with subject: $remoteX500Name, requested server name: ${sslHandler.getRequestedServerName()}." }
|
||||
createAMQPEngine(ctx)
|
||||
onOpen(ctx.channel() as SocketChannel, ConnectionChange(remoteAddress, remoteCert, connected = true, badCert = false))
|
||||
onOpen(ctx.channel() as SocketChannel, ConnectionChange(remoteAddress, remoteCert, connected = true, connectionResult = ConnectionResult.NO_ERROR))
|
||||
}
|
||||
|
||||
private fun handleFailedHandshake(ctx: ChannelHandlerContext, evt: SslHandshakeCompletionEvent) {
|
||||
@ -303,7 +303,7 @@ internal class AMQPChannelHandler(private val serverMode: Boolean,
|
||||
// io.netty.handler.ssl.SslHandler.setHandshakeFailureTransportFailure()
|
||||
cause is SSLException && (cause.message?.contains("writing TLS control frames") == true) -> logWarnWithMDC(cause.message!!)
|
||||
cause is SSLException && (cause.message?.contains("internal_error") == true) -> logWarnWithMDC("Received internal_error during handshake")
|
||||
else -> badCert = true
|
||||
else -> connectionResult = ConnectionResult.HANDSHAKE_FAILURE
|
||||
}
|
||||
logWarnWithMDC("Handshake failure: ${evt.cause().message}")
|
||||
if (log.isTraceEnabled) {
|
||||
|
@ -26,6 +26,7 @@ import rx.Observable
|
||||
import rx.subjects.PublishSubject
|
||||
import java.lang.Long.min
|
||||
import java.net.InetSocketAddress
|
||||
import java.time.Duration
|
||||
import java.util.concurrent.TimeUnit
|
||||
import java.util.concurrent.locks.ReentrantLock
|
||||
import javax.net.ssl.KeyManagerFactory
|
||||
@ -70,6 +71,7 @@ class AMQPClient(val targets: List<NetworkHostAndPort>,
|
||||
private const val MAX_RETRY_INTERVAL = 60000L
|
||||
private const val BACKOFF_MULTIPLIER = 2L
|
||||
private val NUM_CLIENT_THREADS = Integer.getInteger(CORDA_AMQP_NUM_CLIENT_THREAD_PROP_NAME, 2)
|
||||
private val handshakeRetryIntervals = List(5) { Duration.ofMinutes(5) }
|
||||
}
|
||||
|
||||
private val lock = ReentrantLock()
|
||||
@ -82,7 +84,9 @@ class AMQPClient(val targets: List<NetworkHostAndPort>,
|
||||
private var targetIndex = 0
|
||||
private var currentTarget: NetworkHostAndPort = targets.first()
|
||||
private var retryInterval = MIN_RETRY_INTERVAL
|
||||
private val badCertTargets = mutableSetOf<NetworkHostAndPort>()
|
||||
private val handshakeFailureRetryTargets = mutableSetOf<NetworkHostAndPort>()
|
||||
private var retryingHandshakeFailures = false
|
||||
private var retryOffset = 0
|
||||
@Volatile
|
||||
private var amqpActive = false
|
||||
@Volatile
|
||||
@ -91,22 +95,67 @@ class AMQPClient(val targets: List<NetworkHostAndPort>,
|
||||
val localAddressString: String
|
||||
get() = clientChannel?.localAddress()?.toString() ?: "<unknownLocalAddress>"
|
||||
|
||||
private fun nextTarget() {
|
||||
/*
|
||||
Figure out the index of the next address to try to connect to
|
||||
*/
|
||||
private fun setTargetIndex() {
|
||||
val origIndex = targetIndex
|
||||
targetIndex = -1
|
||||
for (offset in 1..targets.size) {
|
||||
val newTargetIndex = (origIndex + offset).rem(targets.size)
|
||||
if (targets[newTargetIndex] !in badCertTargets) {
|
||||
if (targets[newTargetIndex] !in handshakeFailureRetryTargets ) {
|
||||
targetIndex = newTargetIndex
|
||||
break
|
||||
}
|
||||
}
|
||||
if (targetIndex == -1) {
|
||||
log.error("No targets have presented acceptable certificates for $allowedRemoteLegalNames. Halting retries")
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Set how long to wait until trying to connect to the next address
|
||||
*/
|
||||
private fun setTargetRetryInterval() {
|
||||
retryInterval = if (retryingHandshakeFailures) {
|
||||
if (retryOffset < handshakeRetryIntervals.size) {
|
||||
handshakeRetryIntervals[retryOffset++].toMillis()
|
||||
} else {
|
||||
Duration.ofDays(1).toMillis()
|
||||
}
|
||||
} else {
|
||||
min(MAX_RETRY_INTERVAL, retryInterval * BACKOFF_MULTIPLIER)
|
||||
}
|
||||
log.info("Retry connect to ${targets[targetIndex]}")
|
||||
retryInterval = min(MAX_RETRY_INTERVAL, retryInterval * BACKOFF_MULTIPLIER)
|
||||
}
|
||||
|
||||
/*
|
||||
Once a connection is made, reset all the retry-connection info so if there is another connection failure
|
||||
then this node tries to reconnect quickly.
|
||||
*/
|
||||
private fun successfullyConnected() {
|
||||
log.info("Successfully connected to [${targets[targetIndex]}]; resetting the target connection-retry interval")
|
||||
retryingHandshakeFailures = false
|
||||
retryInterval = MIN_RETRY_INTERVAL
|
||||
retryOffset = 0
|
||||
}
|
||||
|
||||
/*
|
||||
Set the next target to connect to
|
||||
*/
|
||||
private fun nextTarget() {
|
||||
setTargetIndex()
|
||||
|
||||
if (targetIndex == -1) {
|
||||
if (handshakeFailureRetryTargets.isNotEmpty()) {
|
||||
log.info("Failed to connect to any targets. Retrying targets that previously failed to handshake.")
|
||||
handshakeFailureRetryTargets.clear()
|
||||
retryingHandshakeFailures = true
|
||||
setTargetIndex()
|
||||
} else {
|
||||
log.error("Attempted connection to targets: $targets, but none of them have presented acceptable certificates" +
|
||||
" for $allowedRemoteLegalNames. Halting retries.")
|
||||
return
|
||||
}
|
||||
}
|
||||
setTargetRetryInterval()
|
||||
log.info("Retry connect to ${targets[targetIndex]} in [$retryInterval] ms")
|
||||
}
|
||||
|
||||
private val connectListener = object : ChannelFutureListener {
|
||||
@ -212,7 +261,7 @@ class AMQPClient(val targets: List<NetworkHostAndPort>,
|
||||
onOpen = { _, change ->
|
||||
parent.run {
|
||||
amqpActive = true
|
||||
retryInterval = MIN_RETRY_INTERVAL // reset to fast reconnect if we connect properly
|
||||
successfullyConnected()
|
||||
_onConnection.onNext(change)
|
||||
}
|
||||
},
|
||||
@ -220,9 +269,9 @@ class AMQPClient(val targets: List<NetworkHostAndPort>,
|
||||
if (parent.amqpChannelHandler == amqpChannelHandler) {
|
||||
parent.run {
|
||||
_onConnection.onNext(change)
|
||||
if (change.badCert) {
|
||||
log.error("Blocking future connection attempts to $target due to bad certificate on endpoint")
|
||||
badCertTargets += target
|
||||
if (change.connectionResult == ConnectionResult.HANDSHAKE_FAILURE) {
|
||||
log.warn("Handshake failure with $target target; will retry later")
|
||||
handshakeFailureRetryTargets += target
|
||||
}
|
||||
|
||||
if (started && amqpActive) {
|
||||
|
@ -3,8 +3,8 @@ package net.corda.nodeapi.internal.protonwrapper.netty
|
||||
import java.net.InetSocketAddress
|
||||
import java.security.cert.X509Certificate
|
||||
|
||||
data class ConnectionChange(val remoteAddress: InetSocketAddress, val remoteCert: X509Certificate?, val connected: Boolean, val badCert: Boolean) {
|
||||
data class ConnectionChange(val remoteAddress: InetSocketAddress, val remoteCert: X509Certificate?, val connected: Boolean, val connectionResult: ConnectionResult) {
|
||||
override fun toString(): String {
|
||||
return "ConnectionChange remoteAddress: $remoteAddress connected state: $connected cert subject: ${remoteCert?.subjectDN} cert ok: ${!badCert}"
|
||||
return "ConnectionChange remoteAddress: $remoteAddress connected state: $connected cert subject: ${remoteCert?.subjectDN} result: ${connectionResult}"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,6 @@
|
||||
package net.corda.nodeapi.internal.protonwrapper.netty
|
||||
|
||||
enum class ConnectionResult {
|
||||
NO_ERROR,
|
||||
HANDSHAKE_FAILURE
|
||||
}
|
Reference in New Issue
Block a user