mirror of
https://github.com/corda/corda.git
synced 2024-12-19 21:17:58 +00:00
CORDA-3549: Re-write flaky CordaServiceLifecycleFatalTests (#5879)
* CORDA-3549: Improve stability of `CordaServiceLifecycleFatalTests` * CORDA-3549: Bump-up reps count to ensure that test is definitely not flaky when executed by CI (once proved the number of reps will be reduced) * CORDA-3549: Making Detekt happier * CORDA-2942: Ensure `NodeLifecycleEventsDistributor` cleans-up smoothly when node shuts down
This commit is contained in:
parent
c1909722f6
commit
b72f71e7ac
@ -231,6 +231,7 @@
|
||||
<ID>ForEachOnRange:BFTSmartConfigTests.kt$BFTSmartConfigTests$forEach { n -> assertEquals(1, maxFaultyReplicas(n)) }</ID>
|
||||
<ID>ForEachOnRange:BFTSmartConfigTests.kt$BFTSmartConfigTests$forEach { n -> assertEquals(2, maxFaultyReplicas(n)) }</ID>
|
||||
<ID>ForEachOnRange:BFTSmartConfigTests.kt$BFTSmartConfigTests$forEach { n -> assertEquals(n, maxFaultyReplicas(n) + minCorrectReplicas(n)) }</ID>
|
||||
<ID>ForEachOnRange:CordaServiceLifecycleFatalTests.kt$CordaServiceLifecycleFatalTests$forEach { logger.info("Rep #$it") // Scenario terminates JVM - node should be running out of process driver(DriverParameters(startNodesInProcess = false, cordappsForAllNodes = listOf(enclosedCordapp()), notarySpecs = emptyList(), systemProperties = mapOf(SECRET_PROPERTY_NAME to "true", tempFilePropertyName to tmpFile.absolutePath))) { val nodeHandle = startNode(providedName = ALICE_NAME).getOrThrow() val rpcInterface = nodeHandle.rpc eventually(duration = 60.seconds) { assertEquals(readyToThrowMarker, tmpFile.readLines().last()) } rpcInterface.protocolVersion tmpFile.appendText("\n" + goodToThrowMarker) // We signalled that it is good to throw which will eventually trigger node shutdown and RPC interface no longer working. eventually(duration = 30.seconds) { assertFailsWith(Exception::class) { try { rpcInterface.protocolVersion } catch (ex: Exception) { logger.info("Thrown as expected", ex) throw ex } } } } }</ID>
|
||||
<ID>ForEachOnRange:HibernateConfigurationTest.kt$HibernateConfigurationTest$forEach { consumeCash(it.DOLLARS) }</ID>
|
||||
<ID>ForEachOnRange:VaultQueryTests.kt$VaultQueryTestsBase$forEach { val newAllStates = vaultService.queryBy<DummyLinearContract.State>(sorting = sorting, criteria = criteria).states assertThat(newAllStates.groupBy(StateAndRef<*>::ref)).hasSameSizeAs(allStates) assertThat(newAllStates).containsExactlyElementsOf(allStates) }</ID>
|
||||
<ID>ForEachOnRange:VaultQueryTests.kt$VaultQueryTestsBase$forEach { vaultFiller.fillWithSomeTestLinearStates(1, linearNumber = it.toLong(), linearString = it.toString()) }</ID>
|
||||
@ -2657,7 +2658,7 @@
|
||||
<ID>MaxLineLength:NodeKeystoreCheckTest.kt$NodeKeystoreCheckTest$setPrivateKey(X509Utilities.CORDA_CLIENT_CA, nodeCA.keyPair.private, listOf(badNodeCACert, badRoot), signingCertStore.entryPassword)</ID>
|
||||
<ID>MaxLineLength:NodeKeystoreCheckTest.kt$NodeKeystoreCheckTest$val badNodeCACert = X509Utilities.createCertificate(CertificateType.NODE_CA, badRoot, badRootKeyPair, ALICE_NAME.x500Principal, nodeCA.keyPair.public)</ID>
|
||||
<ID>MaxLineLength:NodeKeystoreCheckTest.kt$NodeKeystoreCheckTest$val p2pSslConfig = CertificateStoreStubs.P2P.withCertificatesDirectory(certificatesDirectory, keyStorePassword = keystorePassword, trustStorePassword = keystorePassword)</ID>
|
||||
<ID>MaxLineLength:NodeLifecycleEventsDistributor.kt$NodeLifecycleEventsDistributor</ID>
|
||||
<ID>MaxLineLength:NodeLifecycleEventsDistributor.kt$NodeLifecycleEventsDistributor : Closeable</ID>
|
||||
<ID>MaxLineLength:NodeMonitorModel.kt$NodeMonitorModel${ rpc = CordaRPCClient(nodeHostAndPort).start(username, password, GracefulReconnect()) proxyObservable.value = rpc.proxy // Vault snapshot (force single page load with MAX_PAGE_SIZE) + updates val ( statesSnapshot, vaultUpdates ) = rpc.proxy.vaultTrackBy<ContractState>( QueryCriteria.VaultQueryCriteria(Vault.StateStatus.ALL), PageSpecification(DEFAULT_PAGE_NUM, MAX_PAGE_SIZE) ) val unconsumedStates = statesSnapshot.states.filterIndexed { index, _ -> statesSnapshot.statesMetadata[index].status == Vault.StateStatus.UNCONSUMED }.toSet() val consumedStates = statesSnapshot.states.toSet() - unconsumedStates val initialVaultUpdate = Vault.Update(consumedStates, unconsumedStates, references = emptySet()) vaultUpdates.startWith(initialVaultUpdate).subscribe(vaultUpdatesSubject::onNext) // Transactions val (transactions, newTransactions) = @Suppress("DEPRECATION") rpc.proxy.internalVerifiedTransactionsFeed() newTransactions.startWith(transactions).subscribe(transactionsSubject::onNext) // SM -> TX mapping val (smTxMappings, futureSmTxMappings) = rpc.proxy.stateMachineRecordedTransactionMappingFeed() futureSmTxMappings.startWith(smTxMappings).subscribe(stateMachineTransactionMappingSubject::onNext) // Parties on network val (parties, futurePartyUpdate) = rpc.proxy.networkMapFeed() futurePartyUpdate.startWith(parties.map(MapChange::Added)).subscribe(networkMapSubject::onNext) val stateMachines = rpc.proxy.stateMachinesSnapshot() notaryIdentities = rpc.proxy.notaryIdentities() // Extract the flow tracking stream // TODO is there a nicer way of doing this? Stream of streams in general results in code like this... // TODO `progressTrackingSubject` doesn't seem to be used anymore - should it be removed? val currentProgressTrackerUpdates = stateMachines.mapNotNull { stateMachine -> ProgressTrackingEvent.createStreamFromStateMachineInfo(stateMachine) } val futureProgressTrackerUpdates = stateMachineUpdatesSubject.map { stateMachineUpdate -> if (stateMachineUpdate is StateMachineUpdate.Added) { ProgressTrackingEvent.createStreamFromStateMachineInfo(stateMachineUpdate.stateMachineInfo) ?: Observable.empty<ProgressTrackingEvent>() } else { Observable.empty<ProgressTrackingEvent>() } } // We need to retry, because when flow errors, we unsubscribe from progressTrackingSubject. So we end up with stream of state machine updates and no progress trackers. futureProgressTrackerUpdates.startWith(currentProgressTrackerUpdates).flatMap { it }.retry().subscribe(progressTrackingSubject) }</ID>
|
||||
<ID>MaxLineLength:NodeNamedCache.kt$DefaultNamedCacheFactory$name.startsWith("RPCSecurityManagerShiroCache_") -> with(security?.authService?.options?.cache!!) { caffeine.maximumSize(maxEntries).expireAfterWrite(expireAfterSecs, TimeUnit.SECONDS) }</ID>
|
||||
<ID>MaxLineLength:NodeNamedCache.kt$DefaultNamedCacheFactory$open</ID>
|
||||
@ -3961,6 +3962,7 @@
|
||||
<ID>TooGenericExceptionCaught:CordaPersistence.kt$CordaPersistence$e: Exception</ID>
|
||||
<ID>TooGenericExceptionCaught:CordaRPCClientTest.kt$CordaRPCClientTest$e: Exception</ID>
|
||||
<ID>TooGenericExceptionCaught:CordaRPCOpsImpl.kt$CordaRPCOpsImpl$e: Exception</ID>
|
||||
<ID>TooGenericExceptionCaught:CordaServiceLifecycleFatalTests.kt$CordaServiceLifecycleFatalTests$ex: Exception</ID>
|
||||
<ID>TooGenericExceptionCaught:CryptoUtilsTest.kt$CryptoUtilsTest$e: Exception</ID>
|
||||
<ID>TooGenericExceptionCaught:DBNetworkParametersStorage.kt$DBNetworkParametersStorage$e: Exception</ID>
|
||||
<ID>TooGenericExceptionCaught:DataUploadServlet.kt$DataUploadServlet$e: RuntimeException</ID>
|
||||
|
@ -7,6 +7,7 @@ import net.corda.core.internal.concurrent.openFuture
|
||||
import net.corda.core.node.services.CordaServiceCriticalFailureException
|
||||
import net.corda.core.utilities.Try
|
||||
import net.corda.core.utilities.contextLogger
|
||||
import java.io.Closeable
|
||||
import java.util.Collections.singleton
|
||||
import java.util.LinkedList
|
||||
import java.util.concurrent.Executors
|
||||
@ -23,7 +24,7 @@ import kotlin.system.exitProcess
|
||||
*
|
||||
* The class is safe for concurrent use from multiple threads.
|
||||
*/
|
||||
class NodeLifecycleEventsDistributor {
|
||||
class NodeLifecycleEventsDistributor : Closeable {
|
||||
|
||||
companion object {
|
||||
private val log = contextLogger()
|
||||
@ -43,7 +44,7 @@ class NodeLifecycleEventsDistributor {
|
||||
private val readWriteLock: ReadWriteLock = ReentrantReadWriteLock()
|
||||
|
||||
private val executor = Executors.newSingleThreadExecutor(
|
||||
ThreadFactoryBuilder().setNameFormat("NodeLifecycleEventsDistributor-%d").build())
|
||||
ThreadFactoryBuilder().setNameFormat("NodeLifecycleEventsDistributor-%d").setDaemon(true).build())
|
||||
|
||||
/**
|
||||
* Adds observer to the distribution list.
|
||||
@ -88,19 +89,24 @@ class NodeLifecycleEventsDistributor {
|
||||
|
||||
val result = openFuture<Any?>()
|
||||
|
||||
executor.execute {
|
||||
val orderedSnapshot = if (event.reversedPriority) snapshot.reversed() else snapshot
|
||||
orderedSnapshot.forEach {
|
||||
log.debug("Distributing event $event to: $it")
|
||||
val updateResult = it.update(event)
|
||||
if (updateResult.isSuccess) {
|
||||
log.debug("Event $event distribution outcome: $updateResult")
|
||||
} else {
|
||||
log.error("Failed to distribute event $event, failure outcome: $updateResult")
|
||||
handlePossibleFatalTermination(event, updateResult as Try.Failure<String>)
|
||||
}
|
||||
}
|
||||
if(executor.isShutdown || executor.isTerminated) {
|
||||
log.warn("Not distributing $event as executor been already shutdown. Double close() case?")
|
||||
result.set(null)
|
||||
} else {
|
||||
executor.execute {
|
||||
val orderedSnapshot = if (event.reversedPriority) snapshot.reversed() else snapshot
|
||||
orderedSnapshot.forEach {
|
||||
log.debug("Distributing event $event to: $it")
|
||||
val updateResult = it.update(event)
|
||||
if (updateResult.isSuccess) {
|
||||
log.debug("Event $event distribution outcome: $updateResult")
|
||||
} else {
|
||||
log.error("Failed to distribute event $event, failure outcome: $updateResult")
|
||||
handlePossibleFatalTermination(event, updateResult as Try.Failure<String>)
|
||||
}
|
||||
}
|
||||
result.set(null)
|
||||
}
|
||||
}
|
||||
return result.map { }
|
||||
}
|
||||
@ -114,6 +120,10 @@ class NodeLifecycleEventsDistributor {
|
||||
}
|
||||
}
|
||||
|
||||
override fun close() {
|
||||
executor.shutdown()
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom implementation vs. using [kotlin.concurrent.withLock] to allow interruption during lock acquisition.
|
||||
*/
|
||||
|
@ -15,8 +15,6 @@ internal class NodeLifecycleEventsDistributorMultiThreadedTest {
|
||||
private val logger = contextLogger()
|
||||
}
|
||||
|
||||
private val instance = NodeLifecycleEventsDistributor()
|
||||
|
||||
private val addedCounter = AtomicLong()
|
||||
|
||||
private val eventsDeliveredCounter = AtomicLong()
|
||||
@ -24,29 +22,32 @@ internal class NodeLifecycleEventsDistributorMultiThreadedTest {
|
||||
@Test
|
||||
fun addAndDistributeConcurrently() {
|
||||
|
||||
val initialObserversCount = 10
|
||||
repeat(initialObserversCount) { instance.add(MyObserver(it)) }
|
||||
NodeLifecycleEventsDistributor().use { instance ->
|
||||
|
||||
val operationsCount = 100_000
|
||||
val event = NodeLifecycleEvent.BeforeNodeStart(mock())
|
||||
val additionFreq = 1000
|
||||
val distributionFutures = (1..operationsCount).stream(true).mapToObj {
|
||||
if(it % additionFreq == 0) {
|
||||
logger.debug("Adding observer")
|
||||
instance.add(MyObserver(it))
|
||||
addedCounter.incrementAndGet()
|
||||
logger.info("Progress so far: $it")
|
||||
val initialObserversCount = 10
|
||||
repeat(initialObserversCount) { instance.add(MyObserver(it)) }
|
||||
|
||||
val operationsCount = 100_000
|
||||
val event = NodeLifecycleEvent.BeforeNodeStart(mock())
|
||||
val additionFreq = 1000
|
||||
val distributionFutures = (1..operationsCount).stream(true).mapToObj {
|
||||
if (it % additionFreq == 0) {
|
||||
logger.debug("Adding observer")
|
||||
instance.add(MyObserver(it))
|
||||
addedCounter.incrementAndGet()
|
||||
logger.info("Progress so far: $it")
|
||||
}
|
||||
logger.debug("Distributing event")
|
||||
instance.distributeEvent(event)
|
||||
}
|
||||
logger.debug("Distributing event")
|
||||
instance.distributeEvent(event)
|
||||
}
|
||||
distributionFutures.forEach { it.get() }
|
||||
distributionFutures.forEach { it.get() }
|
||||
|
||||
with(eventsDeliveredCounter.get()) {
|
||||
// Greater than original observers times events
|
||||
assertTrue("$this") { this > initialObserversCount.toLong() * operationsCount }
|
||||
// Less than ever added observers times events
|
||||
assertTrue("$this") { this < (initialObserversCount.toLong() + addedCounter.get()) * operationsCount }
|
||||
with(eventsDeliveredCounter.get()) {
|
||||
// Greater than original observers times events
|
||||
assertTrue("$this") { this > initialObserversCount.toLong() * operationsCount }
|
||||
// Less than ever added observers times events
|
||||
assertTrue("$this") { this < (initialObserversCount.toLong() + addedCounter.get()) * operationsCount }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,59 +6,93 @@ import net.corda.core.node.services.CordaServiceCriticalFailureException
|
||||
import net.corda.core.node.services.ServiceLifecycleEvent
|
||||
import net.corda.core.node.services.ServiceLifecycleObserver
|
||||
import net.corda.core.serialization.SingletonSerializeAsToken
|
||||
import net.corda.core.utilities.contextLogger
|
||||
import net.corda.core.utilities.getOrThrow
|
||||
import net.corda.core.utilities.seconds
|
||||
import net.corda.testing.common.internal.eventually
|
||||
import net.corda.testing.core.ALICE_NAME
|
||||
import net.corda.testing.driver.DriverParameters
|
||||
import net.corda.testing.driver.driver
|
||||
import net.corda.testing.node.internal.ListenProcessDeathException
|
||||
import net.corda.testing.node.internal.enclosedCordapp
|
||||
import org.junit.Test
|
||||
import java.io.File
|
||||
import kotlin.test.assertEquals
|
||||
import kotlin.test.assertFailsWith
|
||||
|
||||
class CordaServiceLifecycleFatalTests {
|
||||
|
||||
companion object {
|
||||
|
||||
private val logger = contextLogger()
|
||||
|
||||
// It is important to disarm throwing of the exception as unfortunately this service may be packaged to many
|
||||
// test cordaps, e.g. the one used by [net.corda.node.CordappScanningDriverTest]
|
||||
// If service remains "armed" to throw exceptions this will fail node start-up sequence.
|
||||
// The problem is caused by the fact that test from `net.corda.node` package also hoovers all the sub-packages.
|
||||
// Since this is done as a separate process, the trigger is passed through the system property.
|
||||
const val SECRET_PROPERTY_NAME = "CordaServiceLifecycleFatalTests.armed"
|
||||
private val SECRET_PROPERTY_NAME = this::class.java.enclosingClass.name + "-armed"
|
||||
|
||||
// Temporaty file used as a latch between two processes
|
||||
private val tempFilePropertyName = this::class.java.enclosingClass.name + "-tmpFile"
|
||||
private val tmpFile = createTempFile(prefix = tempFilePropertyName)
|
||||
private const val readyToThrowMarker = "ReadyToThrow"
|
||||
private const val goodToThrowMarker = "GoodToThrow"
|
||||
|
||||
@CordaService
|
||||
@Suppress("unused")
|
||||
class FatalService(services: AppServiceHub) : SingletonSerializeAsToken() {
|
||||
|
||||
init {
|
||||
services.register(observer = FailingObserver)
|
||||
}
|
||||
|
||||
fun computeLength(text: String): Int {
|
||||
require(text.isNotEmpty()) { "Length must be at least 1." }
|
||||
return text.length
|
||||
if(java.lang.Boolean.getBoolean(SECRET_PROPERTY_NAME)) {
|
||||
services.register(observer = FailingObserver)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object FailingObserver : ServiceLifecycleObserver {
|
||||
override fun onServiceLifecycleEvent(event: ServiceLifecycleEvent) {
|
||||
if(java.lang.Boolean.getBoolean(SECRET_PROPERTY_NAME)) {
|
||||
throw CordaServiceCriticalFailureException("failure")
|
||||
val tmpFile = File(System.getProperty(tempFilePropertyName))
|
||||
tmpFile.appendText("\n" + readyToThrowMarker)
|
||||
eventually(duration = 30.seconds) {
|
||||
assertEquals(goodToThrowMarker, tmpFile.readLines().last())
|
||||
}
|
||||
throw CordaServiceCriticalFailureException("controlled failure")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `JVM terminates on critical failure`() {
|
||||
// Scenario terminates JVM - node should be running out of process
|
||||
driver(DriverParameters(startNodesInProcess = false, cordappsForAllNodes = listOf(enclosedCordapp()),
|
||||
notarySpecs = emptyList(), systemProperties = mapOf(Pair(SECRET_PROPERTY_NAME, "true")))) {
|
||||
val nodeHandle = startNode(providedName = ALICE_NAME)
|
||||
// Ensure ample time for all the stat-up lifecycle events to be processed
|
||||
Thread.sleep(2000)
|
||||
assertFailsWith(ListenProcessDeathException::class) {
|
||||
nodeHandle.getOrThrow()
|
||||
(1..20).forEach {
|
||||
|
||||
logger.info("Rep #$it")
|
||||
|
||||
// Scenario terminates JVM - node should be running out of process
|
||||
driver(DriverParameters(startNodesInProcess = false, cordappsForAllNodes = listOf(enclosedCordapp()),
|
||||
notarySpecs = emptyList(),
|
||||
systemProperties = mapOf(SECRET_PROPERTY_NAME to "true", tempFilePropertyName to tmpFile.absolutePath))) {
|
||||
val nodeHandle = startNode(providedName = ALICE_NAME).getOrThrow()
|
||||
|
||||
val rpcInterface = nodeHandle.rpc
|
||||
eventually(duration = 60.seconds) {
|
||||
assertEquals(readyToThrowMarker, tmpFile.readLines().last())
|
||||
}
|
||||
|
||||
rpcInterface.protocolVersion
|
||||
|
||||
tmpFile.appendText("\n" + goodToThrowMarker)
|
||||
|
||||
// We signalled that it is good to throw which will eventually trigger node shutdown and RPC interface no longer working.
|
||||
eventually(duration = 30.seconds) {
|
||||
assertFailsWith(Exception::class) {
|
||||
try {
|
||||
rpcInterface.protocolVersion
|
||||
} catch (ex: Exception) {
|
||||
logger.info("Thrown as expected", ex)
|
||||
throw ex
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -954,7 +954,9 @@ abstract class AbstractNode<S>(val configuration: NodeConfiguration,
|
||||
runOnStop.clear()
|
||||
shutdownExecutor.shutdown()
|
||||
_started = null
|
||||
nodeLifecycleEventsDistributor.distributeEvent(NodeLifecycleEvent.AfterNodeStop(nodeServicesContext))
|
||||
nodeLifecycleEventsDistributor.distributeEvent(NodeLifecycleEvent.AfterNodeStop(nodeServicesContext)).then {
|
||||
nodeLifecycleEventsDistributor.close()
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract fun makeMessagingService(): MessagingService
|
||||
|
Loading…
Reference in New Issue
Block a user