mirror of
https://github.com/corda/corda.git
synced 2025-02-21 01:42:24 +00:00
CORDA-2522 - Improve error reporting around failed flows (#5016)
* [CORDA-2522] Improve error reporting around failed flows (#5000) * Improve error reporting around failed flows * Fix an index to start from 1 when printed * Address first set of review comments (cherry picked from commit 24699cd7f499010f07d518381f1ea31b881311b3) * [CORDA-2522] Follow up changes to error reporting around failed flows (#5006) * Follow up changes to error reporting around failed flows * Have FinalityDoctor report stack trace * Revert changes to the DumpHistoryOnErrorInterceptor (cherry picked from commit 2da597a5b7744e62888d0c1594814454c2d6ef70)
This commit is contained in:
parent
0ba7b65ee8
commit
ee884a92de
@ -292,7 +292,7 @@ abstract class TransactionVerificationException(val txId: SecureHash, message: S
|
||||
class UntrustedAttachmentsException(val txId: SecureHash, val ids: List<SecureHash>) :
|
||||
CordaException("Attempting to load untrusted transaction attachments: $ids. " +
|
||||
"At this time these are not loadable because the DJVM sandbox has not yet been integrated. " +
|
||||
"You will need to install that app version yourself, to whitelist it for use. " +
|
||||
"You will need to manually install the CorDapp to whitelist it for use. " +
|
||||
"Please follow the operational steps outlined in https://docs.corda.net/cordapp-build-systems.html#cordapp-contract-attachments to learn more and continue.")
|
||||
|
||||
/*
|
||||
|
@ -125,8 +125,12 @@ class AttachmentsClassLoader(attachments: List<Attachment>,
|
||||
}
|
||||
.map(Attachment::id)
|
||||
|
||||
if (untrusted.isNotEmpty())
|
||||
if (untrusted.isNotEmpty()) {
|
||||
log.warn("Cannot verify transaction $sampleTxId as the following attachment IDs are untrusted: $untrusted." +
|
||||
"You will need to manually install the CorDapp to whitelist it for use. " +
|
||||
"Please follow the operational steps outlined in https://docs.corda.net/cordapp-build-systems.html#cordapp-contract-attachments to learn more and continue.")
|
||||
throw TransactionVerificationException.UntrustedAttachmentsException(sampleTxId, untrusted)
|
||||
}
|
||||
|
||||
// Enforce the no-overlap and package ownership rules.
|
||||
checkAttachments(attachments)
|
||||
|
@ -243,7 +243,7 @@ class FlowStateMachineImpl<R>(override val id: StateMachineRunId,
|
||||
if(t.isUnrecoverable()) {
|
||||
errorAndTerminate("Caught unrecoverable error from flow. Forcibly terminating the JVM, this might leave resources open, and most likely will.", t)
|
||||
}
|
||||
logger.info("Flow raised an error... sending it to flow hospital", t)
|
||||
logger.info("Flow raised an error: ${t.message}. Sending it to flow hospital to be triaged.")
|
||||
Try.Failure<R>(t)
|
||||
}
|
||||
val softLocksId = if (hasSoftLockedStates) logic.runId.uuid else null
|
||||
|
@ -107,17 +107,17 @@ class StaffedFlowHospital(private val flowMessaging: FlowMessaging, private val
|
||||
val (outcome, event, backOffForChronicCondition) = when (report.diagnosis) {
|
||||
Diagnosis.DISCHARGE -> {
|
||||
val backOff = calculateBackOffForChronicCondition(report, medicalHistory, currentState)
|
||||
log.info("Flow ${flowFiber.id} error discharged from hospital (delay ${backOff.seconds}s) by ${report.by}")
|
||||
log.info("Flow error discharged from hospital (delay ${backOff.seconds}s) by ${report.by} (error was ${report.error.message})")
|
||||
Triple(Outcome.DISCHARGE, Event.RetryFlowFromSafePoint, backOff)
|
||||
}
|
||||
Diagnosis.OVERNIGHT_OBSERVATION -> {
|
||||
log.info("Flow ${flowFiber.id} error kept for overnight observation by ${report.by}")
|
||||
log.info("Flow error kept for overnight observation by ${report.by} (error was ${report.error.message})")
|
||||
// We don't schedule a next event for the flow - it will automatically retry from its checkpoint on node restart
|
||||
Triple(Outcome.OVERNIGHT_OBSERVATION, null, 0.seconds)
|
||||
}
|
||||
Diagnosis.NOT_MY_SPECIALTY -> {
|
||||
// None of the staff care for these errors so we let them propagate
|
||||
log.info("Flow ${flowFiber.id} error allowed to propagate")
|
||||
log.info("Flow error allowed to propagate", report.error)
|
||||
Triple(Outcome.UNTREATABLE, Event.StartErrorPropagation, 0.seconds)
|
||||
}
|
||||
}
|
||||
@ -160,7 +160,8 @@ class StaffedFlowHospital(private val flowMessaging: FlowMessaging, private val
|
||||
return errors
|
||||
.asSequence()
|
||||
.mapIndexed { index, error ->
|
||||
log.info("Flow ${flowFiber.id} has error [$index]", error)
|
||||
// Rely on the logging context to print details of the flow ID.
|
||||
log.info("Error ${index + 1} of ${errors.size}:", error)
|
||||
val diagnoses: Map<Diagnosis, List<Staff>> = staff.groupBy { it.consult(flowFiber, currentState, error, medicalHistory) }
|
||||
// We're only interested in the highest priority diagnosis for the error
|
||||
val (diagnosis, by) = diagnoses.entries.minBy { it.key }!!
|
||||
@ -306,7 +307,7 @@ class StaffedFlowHospital(private val flowMessaging: FlowMessaging, private val
|
||||
override fun consult(flowFiber: FlowFiber, currentState: StateMachineState, newError: Throwable, history: FlowMedicalHistory): Diagnosis {
|
||||
return if (currentState.flowLogic is FinalityHandler || isFromReceiveFinalityFlow(newError)) {
|
||||
log.warn("Flow ${flowFiber.id} failed to be finalised. Manual intervention may be required before retrying " +
|
||||
"the flow by re-starting the node. State machine state: $currentState")
|
||||
"the flow by re-starting the node. State machine state: $currentState", newError)
|
||||
Diagnosis.OVERNIGHT_OBSERVATION
|
||||
} else {
|
||||
Diagnosis.NOT_MY_SPECIALTY
|
||||
|
@ -3,6 +3,7 @@ package net.corda.node.services.statemachine.interceptors
|
||||
import co.paralleluniverse.fibers.Suspendable
|
||||
import net.corda.core.flows.StateMachineRunId
|
||||
import net.corda.core.utilities.contextLogger
|
||||
import net.corda.core.utilities.debug
|
||||
import net.corda.node.services.statemachine.ActionExecutor
|
||||
import net.corda.node.services.statemachine.ErrorState
|
||||
import net.corda.node.services.statemachine.Event
|
||||
@ -39,7 +40,8 @@ class DumpHistoryOnErrorInterceptor(val delegate: TransitionExecutor) : Transiti
|
||||
(record ?: ArrayList()).apply { add(transitionRecord) }
|
||||
}
|
||||
|
||||
// Just if we decide to propagate, and not if just on the way to the hospital.
|
||||
// Just if we decide to propagate, and not if just on the way to the hospital. Only log at debug level here - the flow transition
|
||||
// information is often unhelpful in the logs, and the actual cause of the problem will be logged elsewhere.
|
||||
if (nextState.checkpoint.errorState is ErrorState.Errored && nextState.checkpoint.errorState.propagating) {
|
||||
log.warn("Flow ${fiber.id} errored, dumping all transitions:\n${record!!.joinToString("\n")}")
|
||||
for (error in nextState.checkpoint.errorState.errors) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user