mirror of
https://github.com/corda/corda.git
synced 2025-06-18 07:08:15 +00:00
[CORDA-1879]: Ensure Node dies on unrecoverable errors. (#4213)
This commit is contained in:
committed by
GitHub
parent
ac23fcdf24
commit
dc62b20c5d
@ -493,8 +493,8 @@ abstract class AbstractNode<S>(val configuration: NodeConfiguration,
|
||||
val republishInterval = try {
|
||||
networkMapClient.publish(signedNodeInfo)
|
||||
heartbeatInterval
|
||||
} catch (t: Throwable) {
|
||||
log.warn("Error encountered while publishing node info, will retry again", t)
|
||||
} catch (e: Exception) {
|
||||
log.warn("Error encountered while publishing node info, will retry again", e)
|
||||
// TODO: Exponential backoff? It should reach max interval of eventHorizon/2.
|
||||
1.minutes
|
||||
}
|
||||
|
@ -333,7 +333,7 @@ open class Node(configuration: NodeConfiguration,
|
||||
log.info("Retrieved public IP from Network Map Service: $this. This will be used instead of the provided \"$host\" as the advertised address.")
|
||||
}
|
||||
retrievedHostName
|
||||
} catch (ignore: Throwable) {
|
||||
} catch (ignore: Exception) {
|
||||
// Cannot reach the network map service, ignore the exception and use provided P2P address instead.
|
||||
log.warn("Cannot connect to the network map service for public IP detection.")
|
||||
null
|
||||
|
@ -190,7 +190,7 @@ open class NodeStartup : NodeStartupLogging {
|
||||
node.startupComplete.then {
|
||||
try {
|
||||
InteractiveShell.runLocalShell(node::stop)
|
||||
} catch (e: Throwable) {
|
||||
} catch (e: Exception) {
|
||||
logger.error("Shell failed to start", e)
|
||||
}
|
||||
}
|
||||
|
@ -232,6 +232,7 @@ class RPCServer(
|
||||
log.error("Failed to send message, kicking client. Message was ${job.message}", throwable)
|
||||
serverControl!!.closeConsumerConnectionsForAddress(job.clientAddress.toString())
|
||||
invalidateClient(job.clientAddress)
|
||||
if (throwable is VirtualMachineError) throw throwable
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,8 +94,8 @@ class NetworkMapUpdater(private val networkMapCache: NetworkMapCacheInternal,
|
||||
override fun run() {
|
||||
val nextScheduleDelay = try {
|
||||
updateNetworkMapCache()
|
||||
} catch (t: Throwable) {
|
||||
logger.warn("Error encountered while updating network map, will retry in $defaultRetryInterval", t)
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Error encountered while updating network map, will retry in $defaultRetryInterval", e)
|
||||
defaultRetryInterval
|
||||
}
|
||||
// Schedule the next update.
|
||||
|
@ -219,9 +219,13 @@ class FlowStateMachineImpl<R>(override val id: StateMachineRunId,
|
||||
val result = logic.call()
|
||||
suspend(FlowIORequest.WaitForSessionConfirmations, maySkipCheckpoint = true)
|
||||
Try.Success(result)
|
||||
} catch (throwable: Throwable) {
|
||||
logger.info("Flow threw exception... sending it to flow hospital", throwable)
|
||||
Try.Failure<R>(throwable)
|
||||
} catch (t: Throwable) {
|
||||
if(t is VirtualMachineError) {
|
||||
logger.error("Caught unrecoverable error from flow. Forcibly terminating the JVM, this might leave resources open, and most likely will.", t)
|
||||
Runtime.getRuntime().halt(1)
|
||||
}
|
||||
logger.info("Flow raised an error... sending it to flow hospital", t)
|
||||
Try.Failure<R>(t)
|
||||
}
|
||||
val softLocksId = if (hasSoftLockedStates) logic.runId.uuid else null
|
||||
val finalEvent = when (resultOrError) {
|
||||
@ -373,8 +377,8 @@ class FlowStateMachineImpl<R>(override val id: StateMachineRunId,
|
||||
maySkipCheckpoint = skipPersistingCheckpoint,
|
||||
fiber = this.checkpointSerialize(context = serializationContext.value)
|
||||
)
|
||||
} catch (throwable: Throwable) {
|
||||
Event.Error(throwable)
|
||||
} catch (exception: Exception) {
|
||||
Event.Error(exception)
|
||||
}
|
||||
|
||||
// We must commit the database transaction before returning from this closure otherwise Quasar may schedule
|
||||
|
@ -43,6 +43,7 @@ import net.corda.nodeapi.internal.persistence.wrapWithDatabaseTransaction
|
||||
import net.corda.serialization.internal.CheckpointSerializeAsTokenContextImpl
|
||||
import net.corda.serialization.internal.withTokenContext
|
||||
import org.apache.activemq.artemis.utils.ReusableLatch
|
||||
import org.apache.logging.log4j.LogManager
|
||||
import rx.Observable
|
||||
import rx.subjects.PublishSubject
|
||||
import java.security.SecureRandom
|
||||
@ -135,7 +136,13 @@ class SingleThreadedStateMachineManager(
|
||||
val fibers = restoreFlowsFromCheckpoints()
|
||||
metrics.register("Flows.InFlight", Gauge<Int> { mutex.content.flows.size })
|
||||
Fiber.setDefaultUncaughtExceptionHandler { fiber, throwable ->
|
||||
(fiber as FlowStateMachineImpl<*>).logger.warn("Caught exception from flow", throwable)
|
||||
if (throwable is VirtualMachineError) {
|
||||
(fiber as FlowStateMachineImpl<*>).logger.error("Caught unrecoverable error from flow. Forcibly terminating the JVM, this might leave resources open, and most likely will.", throwable)
|
||||
LogManager.shutdown(true)
|
||||
Runtime.getRuntime().halt(1)
|
||||
} else {
|
||||
(fiber as FlowStateMachineImpl<*>).logger.warn("Caught exception from flow", throwable)
|
||||
}
|
||||
}
|
||||
serviceHub.networkMapCache.nodeReady.then {
|
||||
logger.info("Node ready, info: ${serviceHub.myInfo}")
|
||||
@ -606,7 +613,7 @@ class SingleThreadedStateMachineManager(
|
||||
private fun deserializeCheckpoint(serializedCheckpoint: SerializedBytes<Checkpoint>): Checkpoint? {
|
||||
return try {
|
||||
serializedCheckpoint.checkpointDeserialize(context = checkpointSerializationContext!!)
|
||||
} catch (exception: Throwable) {
|
||||
} catch (exception: Exception) {
|
||||
logger.error("Encountered unrestorable checkpoint!", exception)
|
||||
null
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ class TransitionExecutorImpl(
|
||||
for (action in transition.actions) {
|
||||
try {
|
||||
actionExecutor.executeAction(fiber, action)
|
||||
} catch (exception: Throwable) {
|
||||
} catch (exception: Exception) {
|
||||
contextTransactionOrNull?.close()
|
||||
if (transition.newState.checkpoint.errorState is ErrorState.Errored) {
|
||||
// If we errored while transitioning to an error state then we cannot record the additional
|
||||
|
@ -77,8 +77,8 @@ class FiberDeserializationChecker {
|
||||
is Job.Check -> {
|
||||
try {
|
||||
job.serializedFiber.checkpointDeserialize(context = checkpointSerializationContext)
|
||||
} catch (throwable: Throwable) {
|
||||
log.error("Encountered unrestorable checkpoint!", throwable)
|
||||
} catch (exception: Exception) {
|
||||
log.error("Encountered unrestorable checkpoint!", exception)
|
||||
foundUnrestorableFibers = true
|
||||
}
|
||||
}
|
||||
|
@ -276,7 +276,7 @@ class NodeVaultServiceTest {
|
||||
assertThat(vaultService.queryBy<Cash.State>(criteriaByLockId1).states).hasSize(3)
|
||||
}
|
||||
println("SOFT LOCK STATES #1 succeeded")
|
||||
} catch (e: Throwable) {
|
||||
} catch (e: Exception) {
|
||||
println("SOFT LOCK STATES #1 failed")
|
||||
} finally {
|
||||
countDown.countDown()
|
||||
@ -292,7 +292,7 @@ class NodeVaultServiceTest {
|
||||
assertThat(vaultService.queryBy<Cash.State>(criteriaByLockId2).states).hasSize(3)
|
||||
}
|
||||
println("SOFT LOCK STATES #2 succeeded")
|
||||
} catch (e: Throwable) {
|
||||
} catch (e: Exception) {
|
||||
println("SOFT LOCK STATES #2 failed")
|
||||
} finally {
|
||||
countDown.countDown()
|
||||
|
@ -327,7 +327,7 @@ class TLSAuthenticationTests {
|
||||
lock.notifyAll()
|
||||
}
|
||||
sslServerSocket.close()
|
||||
} catch (ex: Throwable) {
|
||||
} catch (ex: Exception) {
|
||||
serverError = true
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user