ENT-1729 P2P message de-duplication performance optimisation has a flaw (#675)

This commit is contained in:
Rick Parker 2018-04-04 15:49:00 +01:00 committed by GitHub
parent 6c2cfc3880
commit 23184b7495
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 121 additions and 53 deletions

View File

@ -11,42 +11,49 @@
package net.corda.node.services.messaging
import com.github.benmanes.caffeine.cache.Caffeine
import net.corda.core.crypto.SecureHash
import net.corda.core.identity.CordaX500Name
import net.corda.node.services.statemachine.DeduplicationId
import net.corda.node.utilities.AppendOnlyPersistentMap
import net.corda.nodeapi.internal.persistence.CordaPersistence
import net.corda.nodeapi.internal.persistence.NODE_DATABASE_PREFIX
import org.apache.mina.util.ConcurrentHashSet
import java.io.Serializable
import java.time.Instant
import java.util.*
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.TimeUnit
import javax.persistence.Column
import javax.persistence.Entity
import javax.persistence.Id
typealias SenderHashToSeqNo = Pair<String, Long?>
/**
* Encapsulate the de-duplication logic.
*/
class P2PMessageDeduplicator(private val database: CordaPersistence) {
val ourSenderUUID = UUID.randomUUID().toString()
// A temporary in-memory set of deduplication IDs. When we receive a message we don't persist the ID immediately,
// A temporary in-memory set of deduplication IDs and associated high water mark details.
// When we receive a message we don't persist the ID immediately,
// so we store the ID here in the meantime (until the persisting db tx has committed). This is because Artemis may
// redeliver messages to the same consumer if they weren't ACKed.
private val beingProcessedMessages = ConcurrentHashSet<DeduplicationId>()
private val beingProcessedMessages = ConcurrentHashMap<DeduplicationId, MessageMeta>()
private val processedMessages = createProcessedMessages()
// We add the peer to the key, so other peers cannot attempt malicious meddling with sequence numbers.
// Expire after 7 days since we last touched an entry, to avoid infinite growth.
private val senderUUIDSeqNoHWM: MutableMap<Triple<String, CordaX500Name, Boolean>, Long> = Caffeine.newBuilder().expireAfterAccess(7, TimeUnit.DAYS).build<Triple<String, CordaX500Name, Boolean>, Long>().asMap()
private val senderUUIDSeqNoHWM: MutableMap<SenderKey, SenderHashToSeqNo> = Caffeine.newBuilder().expireAfterAccess(7, TimeUnit.DAYS).build<SenderKey, SenderHashToSeqNo>().asMap()
private fun createProcessedMessages(): AppendOnlyPersistentMap<DeduplicationId, Instant, ProcessedMessage, String> {
private fun createProcessedMessages(): AppendOnlyPersistentMap<DeduplicationId, MessageMeta, ProcessedMessage, String> {
return AppendOnlyPersistentMap(
toPersistentEntityKey = { it.toString },
fromPersistentEntity = { Pair(DeduplicationId(it.id), it.insertionTime) },
toPersistentEntity = { key: DeduplicationId, value: Instant ->
fromPersistentEntity = { Pair(DeduplicationId(it.id), MessageMeta(it.insertionTime, it.hash, it.seqNo)) },
toPersistentEntity = { key: DeduplicationId, value: MessageMeta ->
ProcessedMessage().apply {
id = key.toString
insertionTime = value
insertionTime = value.insertionTime
hash = value.senderHash
seqNo = value.senderSeqNo
}
},
persistentEntityClass = ProcessedMessage::class.java
@ -67,24 +74,40 @@ class P2PMessageDeduplicator(private val database: CordaPersistence) {
* We also ensure the UUID cannot be spoofed, by incorporating the authenticated sender into the key of the map/cache.
*/
private fun isDuplicateWithPotentialOptimization(receivedSenderUUID: String, receivedSenderSeqNo: Long, msg: ReceivedMessage): Boolean {
return senderUUIDSeqNoHWM.compute(Triple(receivedSenderUUID, msg.peer, msg.isSessionInit)) { key, existingSeqNoHWM ->
val isNewHWM = (existingSeqNoHWM != null && existingSeqNoHWM < receivedSenderSeqNo)
if (isNewHWM) {
// If we are the new HWM, set the HWM to us.
receivedSenderSeqNo
} else {
// If we are a duplicate, unset the HWM, since it seems like re-delivery is happening for traffic from that sender.
// else if we are not a duplicate, (re)set the HWM to us.
if (isDuplicateInDatabase(msg)) null else receivedSenderSeqNo
val senderKey = SenderKey(receivedSenderUUID, msg.peer, msg.isSessionInit)
val (senderHash, existingSeqNoHWM) = senderUUIDSeqNoHWM.computeIfAbsent(senderKey) {
highestSeqNoHWMInDatabaseFor(senderKey)
}
} != receivedSenderSeqNo
val isNewHWM = (existingSeqNoHWM == null || existingSeqNoHWM < receivedSenderSeqNo)
return if (isNewHWM) {
senderUUIDSeqNoHWM[senderKey] = senderHash to receivedSenderSeqNo
false
} else isDuplicateInDatabase(msg)
}
/**
* Work out the highest sequence number for the given sender, as persisted last time we ran.
*
* TODO: consider the performance of doing this per sender vs. one big load at startup, vs. adding an index (and impact on inserts).
*/
private fun highestSeqNoHWMInDatabaseFor(senderKey: SenderKey): SenderHashToSeqNo {
val senderHash = senderHash(senderKey)
return senderHash to database.transaction {
val cb1 = session.criteriaBuilder
val cq1 = cb1.createQuery(Long::class.java)
val root = cq1.from(ProcessedMessage::class.java)
session.createQuery(cq1.select(cb1.max(root.get<Long>(ProcessedMessage::seqNo.name))).where(cb1.equal(root.get<String>(ProcessedMessage::hash.name), senderHash))).singleResult
}
}
// We need to incorporate the sending party, and the sessionInit flag as per the in-memory cache.
private fun senderHash(senderKey: SenderKey) = SecureHash.sha256(senderKey.peer.toString() + senderKey.isSessionInit.toString() + senderKey.senderUUID).toString()
/**
* @return true if we have seen this message before.
*/
fun isDuplicate(msg: ReceivedMessage): Boolean {
if (msg.uniqueMessageId in beingProcessedMessages) {
if (beingProcessedMessages.containsKey(msg.uniqueMessageId)) {
return true
}
val receivedSenderUUID = msg.senderUUID
@ -102,15 +125,20 @@ class P2PMessageDeduplicator(private val database: CordaPersistence) {
/**
* Called the first time we encounter [deduplicationId].
*/
fun signalMessageProcessStart(deduplicationId: DeduplicationId) {
beingProcessedMessages.add(deduplicationId)
fun signalMessageProcessStart(msg: ReceivedMessage) {
val receivedSenderUUID = msg.senderUUID
val receivedSenderSeqNo = msg.senderSeqNo
// We don't want a mix of nulls and values so we ensure that here.
val senderHash: String? = if (receivedSenderUUID != null && receivedSenderSeqNo != null) senderUUIDSeqNoHWM[SenderKey(receivedSenderUUID, msg.peer, msg.isSessionInit)]?.first else null
val senderSeqNo: Long? = if (senderHash != null) msg.senderSeqNo else null
beingProcessedMessages[msg.uniqueMessageId] = MessageMeta(Instant.now(), senderHash, senderSeqNo)
}
/**
* Called inside a DB transaction to persist [deduplicationId].
*/
fun persistDeduplicationId(deduplicationId: DeduplicationId) {
processedMessages[deduplicationId] = Instant.now()
processedMessages[deduplicationId] = beingProcessedMessages[deduplicationId]!!
}
/**
@ -129,6 +157,16 @@ class P2PMessageDeduplicator(private val database: CordaPersistence) {
var id: String = "",
@Column(name = "insertion_time")
var insertionTime: Instant = Instant.now()
)
var insertionTime: Instant = Instant.now(),
@Column(name = "sender", length = 64)
var hash: String? = "",
@Column(name = "sequence_number")
var seqNo: Long? = null
) : Serializable
private data class MessageMeta(val insertionTime: Instant, val senderHash: String?, val senderSeqNo: Long?)
private data class SenderKey(val senderUUID: String, val peer: CordaX500Name, val isSessionInit: Boolean)
}

View File

@ -27,11 +27,7 @@ import net.corda.core.serialization.SingletonSerializeAsToken
import net.corda.core.serialization.deserialize
import net.corda.core.serialization.internal.nodeSerializationEnv
import net.corda.core.serialization.serialize
import net.corda.core.utilities.ByteSequence
import net.corda.core.utilities.NetworkHostAndPort
import net.corda.core.utilities.OpaqueBytes
import net.corda.core.utilities.contextLogger
import net.corda.core.utilities.trace
import net.corda.core.utilities.*
import net.corda.node.VersionInfo
import net.corda.node.internal.LifecycleSupport
import net.corda.node.internal.artemis.ReactiveArtemisConsumer
@ -44,14 +40,11 @@ import net.corda.node.utilities.PersistentMap
import net.corda.nodeapi.ArtemisTcpTransport
import net.corda.nodeapi.ConnectionDirection
import net.corda.nodeapi.internal.ArtemisMessagingComponent
import net.corda.nodeapi.internal.ArtemisMessagingComponent.ArtemisAddress
import net.corda.nodeapi.internal.ArtemisMessagingComponent.*
import net.corda.nodeapi.internal.ArtemisMessagingComponent.Companion.BRIDGE_CONTROL
import net.corda.nodeapi.internal.ArtemisMessagingComponent.Companion.BRIDGE_NOTIFY
import net.corda.nodeapi.internal.ArtemisMessagingComponent.Companion.P2PMessagingHeaders
import net.corda.nodeapi.internal.ArtemisMessagingComponent.Companion.PEERS_PREFIX
import net.corda.nodeapi.internal.ArtemisMessagingComponent.NodeAddress
import net.corda.nodeapi.internal.ArtemisMessagingComponent.RemoteInboxAddress
import net.corda.nodeapi.internal.ArtemisMessagingComponent.ServiceAddress
import net.corda.nodeapi.internal.bridging.BridgeControl
import net.corda.nodeapi.internal.bridging.BridgeEntry
import net.corda.nodeapi.internal.persistence.CordaPersistence
@ -61,12 +54,7 @@ import org.apache.activemq.artemis.api.core.Message.HDR_DUPLICATE_DETECTION_ID
import org.apache.activemq.artemis.api.core.Message.HDR_VALIDATED_USER
import org.apache.activemq.artemis.api.core.RoutingType
import org.apache.activemq.artemis.api.core.SimpleString
import org.apache.activemq.artemis.api.core.client.ActiveMQClient
import org.apache.activemq.artemis.api.core.client.ClientConsumer
import org.apache.activemq.artemis.api.core.client.ClientMessage
import org.apache.activemq.artemis.api.core.client.ClientProducer
import org.apache.activemq.artemis.api.core.client.ClientSession
import org.apache.activemq.artemis.api.core.client.ServerLocator
import org.apache.activemq.artemis.api.core.client.*
import org.apache.commons.lang.ArrayUtils.EMPTY_BYTE_ARRAY
import rx.Observable
import rx.Subscription
@ -192,17 +180,6 @@ class P2PMessagingClient(val config: NodeConfiguration,
private val deduplicator = P2PMessageDeduplicator(database)
internal var messagingExecutor: MessagingExecutor? = null
@Entity
@javax.persistence.Table(name = "${NODE_DATABASE_PREFIX}message_ids")
class ProcessedMessage(
@Id
@Column(name = "message_id", length = 64)
var uuid: String = "",
@Column(name = "insertion_time")
var insertionTime: Instant = Instant.now()
) : Serializable
@Entity
@javax.persistence.Table(name = "${NODE_DATABASE_PREFIX}message_retry")
class RetryMessage(
@ -456,7 +433,7 @@ class P2PMessagingClient(val config: NodeConfiguration,
artemisToCordaMessage(artemisMessage)?.let { cordaMessage ->
if (!deduplicator.isDuplicate(cordaMessage)) {
deduplicator.signalMessageProcessStart(cordaMessage.uniqueMessageId)
deduplicator.signalMessageProcessStart(cordaMessage)
deliver(cordaMessage, artemisMessage)
} else {
log.trace { "Discard duplicate message ${cordaMessage.uniqueMessageId} for ${cordaMessage.topic}" }

View File

@ -29,4 +29,10 @@
</addColumn>
</changeSet>
<changeSet author="R3.Corda" id="add_dedup_columns">
<addColumn tableName="node_message_ids">
<column name="sender" type="NVARCHAR(64)"/>
<column name="sequence_number" type="BIGINT"/>
</addColumn>
</changeSet>
</databaseChangeLog>

View File

@ -191,6 +191,7 @@ class ArtemisMessagingTest {
try {
val messagingClient2 = createMessagingClient()
startNodeMessagingClient()
val fakeMsg2 = messagingClient2.messagingExecutor!!.cordaToArtemisMessage(message)
fakeMsg2!!.putStringProperty(HDR_VALIDATED_USER, SimpleString("O=Bank A, L=New York, C=US"))
@ -234,6 +235,51 @@ class ArtemisMessagingTest {
}
}
// Redelivery to a receiver who stops and restarts (some re-deliveries from Artemis, but with receiver state reset), but the original
// messages were recorded as consumed out of order, and only the *second* message was acked.
@Test
fun `re-receive from different client is not ignored when acked out of order`() {
// Don't ack first message, pretend we exit before that happens (but after second message is acked).
val (messagingClient1, receivedMessages) = createAndStartClientAndServer(dontAckCondition = { received -> String(received.data.bytes, Charsets.UTF_8) == "first msg" })
val message1 = messagingClient1.createMessage(TOPIC, data = "first msg".toByteArray())
val message2 = messagingClient1.createMessage(TOPIC, data = "second msg".toByteArray())
val fakeMsg1 = messagingClient1.messagingExecutor!!.cordaToArtemisMessage(message1)
val fakeMsg2 = messagingClient1.messagingExecutor!!.cordaToArtemisMessage(message2)
fakeMsg1!!.putStringProperty(HDR_VALIDATED_USER, SimpleString("O=Bank A, L=New York, C=US"))
fakeMsg2!!.putStringProperty(HDR_VALIDATED_USER, SimpleString("O=Bank A, L=New York, C=US"))
messagingClient1.deliver(fakeMsg1)
messagingClient1.deliver(fakeMsg2)
// Now change the receiver
try {
val messagingClient2 = createMessagingClient()
messagingClient2.addMessageHandler(TOPIC) { msg, _, handle ->
// The try-finally causes the test to fail if there's a duplicate insert (which, naturally, is an error but otherwise gets swallowed).
try {
database.transaction { handle.insideDatabaseTransaction() }
} finally {
handle.afterDatabaseTransaction() // We ACK first so that if it fails we won't get a duplicate in [receivedMessages]
receivedMessages.add(msg)
}
}
startNodeMessagingClient()
messagingClient2.deliver(fakeMsg1)
messagingClient2.deliver(fakeMsg2)
// Should receive 2 and then 1 (and not 2 again).
val received = receivedMessages.take()
assertThat(received.senderSeqNo).isEqualTo(1)
val received2 = receivedMessages.poll()
assertThat(received2.senderSeqNo).isEqualTo(0)
val received3 = receivedMessages.poll()
assertThat(received3).isNull()
} finally {
messagingClient1.stop()
}
}
// Re-receive on different client from re-started sender
@Test
fun `re-send from different client and re-receive from different client is ignored`() {
@ -274,13 +320,14 @@ class ArtemisMessagingTest {
messagingClient!!.start()
}
private fun createAndStartClientAndServer(platformVersion: Int = 1): Pair<P2PMessagingClient, BlockingQueue<ReceivedMessage>> {
private fun createAndStartClientAndServer(platformVersion: Int = 1, dontAckCondition: (msg: ReceivedMessage) -> Boolean = { false }): Pair<P2PMessagingClient, BlockingQueue<ReceivedMessage>> {
val receivedMessages = LinkedBlockingQueue<ReceivedMessage>()
createMessagingServer().start()
val messagingClient = createMessagingClient(platformVersion = platformVersion)
messagingClient.addMessageHandler(TOPIC) { message, _, handle ->
if (dontAckCondition(message)) return@addMessageHandler
database.transaction { handle.insideDatabaseTransaction() }
handle.afterDatabaseTransaction() // We ACK first so that if it fails we won't get a duplicate in [receivedMessages]
receivedMessages.add(message)