[CORDA-1879]: Ensure Node dies on unrecoverable errors. (#4213)

This commit is contained in:
Michele Sollecito
2018-11-12 15:56:04 +00:00
committed by GitHub
parent ac23fcdf24
commit dc62b20c5d
38 changed files with 83 additions and 282 deletions

View File

@ -493,8 +493,8 @@ abstract class AbstractNode<S>(val configuration: NodeConfiguration,
val republishInterval = try {
networkMapClient.publish(signedNodeInfo)
heartbeatInterval
} catch (t: Throwable) {
log.warn("Error encountered while publishing node info, will retry again", t)
} catch (e: Exception) {
log.warn("Error encountered while publishing node info, will retry again", e)
// TODO: Exponential backoff? It should reach max interval of eventHorizon/2.
1.minutes
}

View File

@ -333,7 +333,7 @@ open class Node(configuration: NodeConfiguration,
log.info("Retrieved public IP from Network Map Service: $this. This will be used instead of the provided \"$host\" as the advertised address.")
}
retrievedHostName
} catch (ignore: Throwable) {
} catch (ignore: Exception) {
// Cannot reach the network map service, ignore the exception and use provided P2P address instead.
log.warn("Cannot connect to the network map service for public IP detection.")
null

View File

@ -190,7 +190,7 @@ open class NodeStartup : NodeStartupLogging {
node.startupComplete.then {
try {
InteractiveShell.runLocalShell(node::stop)
} catch (e: Throwable) {
} catch (e: Exception) {
logger.error("Shell failed to start", e)
}
}

View File

@ -232,6 +232,7 @@ class RPCServer(
log.error("Failed to send message, kicking client. Message was ${job.message}", throwable)
serverControl!!.closeConsumerConnectionsForAddress(job.clientAddress.toString())
invalidateClient(job.clientAddress)
if (throwable is VirtualMachineError) throw throwable
}
}

View File

@ -94,8 +94,8 @@ class NetworkMapUpdater(private val networkMapCache: NetworkMapCacheInternal,
override fun run() {
val nextScheduleDelay = try {
updateNetworkMapCache()
} catch (t: Throwable) {
logger.warn("Error encountered while updating network map, will retry in $defaultRetryInterval", t)
} catch (e: Exception) {
logger.warn("Error encountered while updating network map, will retry in $defaultRetryInterval", e)
defaultRetryInterval
}
// Schedule the next update.

View File

@ -219,9 +219,13 @@ class FlowStateMachineImpl<R>(override val id: StateMachineRunId,
val result = logic.call()
suspend(FlowIORequest.WaitForSessionConfirmations, maySkipCheckpoint = true)
Try.Success(result)
} catch (throwable: Throwable) {
logger.info("Flow threw exception... sending it to flow hospital", throwable)
Try.Failure<R>(throwable)
} catch (t: Throwable) {
if(t is VirtualMachineError) {
logger.error("Caught unrecoverable error from flow. Forcibly terminating the JVM, this might leave resources open, and most likely will.", t)
Runtime.getRuntime().halt(1)
}
logger.info("Flow raised an error... sending it to flow hospital", t)
Try.Failure<R>(t)
}
val softLocksId = if (hasSoftLockedStates) logic.runId.uuid else null
val finalEvent = when (resultOrError) {
@ -373,8 +377,8 @@ class FlowStateMachineImpl<R>(override val id: StateMachineRunId,
maySkipCheckpoint = skipPersistingCheckpoint,
fiber = this.checkpointSerialize(context = serializationContext.value)
)
} catch (throwable: Throwable) {
Event.Error(throwable)
} catch (exception: Exception) {
Event.Error(exception)
}
// We must commit the database transaction before returning from this closure otherwise Quasar may schedule

View File

@ -43,6 +43,7 @@ import net.corda.nodeapi.internal.persistence.wrapWithDatabaseTransaction
import net.corda.serialization.internal.CheckpointSerializeAsTokenContextImpl
import net.corda.serialization.internal.withTokenContext
import org.apache.activemq.artemis.utils.ReusableLatch
import org.apache.logging.log4j.LogManager
import rx.Observable
import rx.subjects.PublishSubject
import java.security.SecureRandom
@ -135,7 +136,13 @@ class SingleThreadedStateMachineManager(
val fibers = restoreFlowsFromCheckpoints()
metrics.register("Flows.InFlight", Gauge<Int> { mutex.content.flows.size })
Fiber.setDefaultUncaughtExceptionHandler { fiber, throwable ->
(fiber as FlowStateMachineImpl<*>).logger.warn("Caught exception from flow", throwable)
if (throwable is VirtualMachineError) {
(fiber as FlowStateMachineImpl<*>).logger.error("Caught unrecoverable error from flow. Forcibly terminating the JVM, this might leave resources open, and most likely will.", throwable)
LogManager.shutdown(true)
Runtime.getRuntime().halt(1)
} else {
(fiber as FlowStateMachineImpl<*>).logger.warn("Caught exception from flow", throwable)
}
}
serviceHub.networkMapCache.nodeReady.then {
logger.info("Node ready, info: ${serviceHub.myInfo}")
@ -606,7 +613,7 @@ class SingleThreadedStateMachineManager(
private fun deserializeCheckpoint(serializedCheckpoint: SerializedBytes<Checkpoint>): Checkpoint? {
return try {
serializedCheckpoint.checkpointDeserialize(context = checkpointSerializationContext!!)
} catch (exception: Throwable) {
} catch (exception: Exception) {
logger.error("Encountered unrestorable checkpoint!", exception)
null
}

View File

@ -39,7 +39,7 @@ class TransitionExecutorImpl(
for (action in transition.actions) {
try {
actionExecutor.executeAction(fiber, action)
} catch (exception: Throwable) {
} catch (exception: Exception) {
contextTransactionOrNull?.close()
if (transition.newState.checkpoint.errorState is ErrorState.Errored) {
// If we errored while transitioning to an error state then we cannot record the additional

View File

@ -77,8 +77,8 @@ class FiberDeserializationChecker {
is Job.Check -> {
try {
job.serializedFiber.checkpointDeserialize(context = checkpointSerializationContext)
} catch (throwable: Throwable) {
log.error("Encountered unrestorable checkpoint!", throwable)
} catch (exception: Exception) {
log.error("Encountered unrestorable checkpoint!", exception)
foundUnrestorableFibers = true
}
}

View File

@ -276,7 +276,7 @@ class NodeVaultServiceTest {
assertThat(vaultService.queryBy<Cash.State>(criteriaByLockId1).states).hasSize(3)
}
println("SOFT LOCK STATES #1 succeeded")
} catch (e: Throwable) {
} catch (e: Exception) {
println("SOFT LOCK STATES #1 failed")
} finally {
countDown.countDown()
@ -292,7 +292,7 @@ class NodeVaultServiceTest {
assertThat(vaultService.queryBy<Cash.State>(criteriaByLockId2).states).hasSize(3)
}
println("SOFT LOCK STATES #2 succeeded")
} catch (e: Throwable) {
} catch (e: Exception) {
println("SOFT LOCK STATES #2 failed")
} finally {
countDown.countDown()

View File

@ -327,7 +327,7 @@ class TLSAuthenticationTests {
lock.notifyAll()
}
sslServerSocket.close()
} catch (ex: Throwable) {
} catch (ex: Exception) {
serverError = true
}
}