mirror of
https://github.com/corda/corda.git
synced 2024-12-19 04:57:58 +00:00
[CORDA-2522] Follow up changes to error reporting around failed flows (#5006)
* Follow up changes to error reporting around failed flows * Have FinalityDoctor report stack trace * Revert changes to the DumpHistoryOnErrorInterceptor
This commit is contained in:
parent
bf66fa67ce
commit
2da597a5b7
@ -107,17 +107,17 @@ class StaffedFlowHospital(private val flowMessaging: FlowMessaging, private val
|
||||
val (outcome, event, backOffForChronicCondition) = when (report.diagnosis) {
|
||||
Diagnosis.DISCHARGE -> {
|
||||
val backOff = calculateBackOffForChronicCondition(report, medicalHistory, currentState)
|
||||
log.info("Flow ${flowFiber.id} error discharged from hospital (delay ${backOff.seconds}s) by ${report.by}")
|
||||
log.info("Flow error discharged from hospital (delay ${backOff.seconds}s) by ${report.by} (error was ${report.error.message})")
|
||||
Triple(Outcome.DISCHARGE, Event.RetryFlowFromSafePoint, backOff)
|
||||
}
|
||||
Diagnosis.OVERNIGHT_OBSERVATION -> {
|
||||
log.info("Flow ${flowFiber.id} error kept for overnight observation by ${report.by}")
|
||||
log.info("Flow error kept for overnight observation by ${report.by} (error was ${report.error.message})")
|
||||
// We don't schedule a next event for the flow - it will automatically retry from its checkpoint on node restart
|
||||
Triple(Outcome.OVERNIGHT_OBSERVATION, null, 0.seconds)
|
||||
}
|
||||
Diagnosis.NOT_MY_SPECIALTY -> {
|
||||
// None of the staff care for these errors so we let them propagate
|
||||
log.info("Flow ${flowFiber.id} error allowed to propagate")
|
||||
log.info("Flow error allowed to propagate", report.error)
|
||||
Triple(Outcome.UNTREATABLE, Event.StartErrorPropagation, 0.seconds)
|
||||
}
|
||||
}
|
||||
@ -161,7 +161,7 @@ class StaffedFlowHospital(private val flowMessaging: FlowMessaging, private val
|
||||
.asSequence()
|
||||
.mapIndexed { index, error ->
|
||||
// Rely on the logging context to print details of the flow ID.
|
||||
log.warn("Error ${index + 1} of ${errors.size}:", error)
|
||||
log.info("Error ${index + 1} of ${errors.size}:", error)
|
||||
val diagnoses: Map<Diagnosis, List<Staff>> = staff.groupBy { it.consult(flowFiber, currentState, error, medicalHistory) }
|
||||
// We're only interested in the highest priority diagnosis for the error
|
||||
val (diagnosis, by) = diagnoses.entries.minBy { it.key }!!
|
||||
@ -307,7 +307,7 @@ class StaffedFlowHospital(private val flowMessaging: FlowMessaging, private val
|
||||
override fun consult(flowFiber: FlowFiber, currentState: StateMachineState, newError: Throwable, history: FlowMedicalHistory): Diagnosis {
|
||||
return if (currentState.flowLogic is FinalityHandler || isFromReceiveFinalityFlow(newError)) {
|
||||
log.warn("Flow ${flowFiber.id} failed to be finalised. Manual intervention may be required before retrying " +
|
||||
"the flow by re-starting the node. State machine state: $currentState")
|
||||
"the flow by re-starting the node. State machine state: $currentState", newError)
|
||||
Diagnosis.OVERNIGHT_OBSERVATION
|
||||
} else {
|
||||
Diagnosis.NOT_MY_SPECIALTY
|
||||
|
@ -3,6 +3,7 @@ package net.corda.node.services.statemachine.interceptors
|
||||
import co.paralleluniverse.fibers.Suspendable
|
||||
import net.corda.core.flows.StateMachineRunId
|
||||
import net.corda.core.utilities.contextLogger
|
||||
import net.corda.core.utilities.debug
|
||||
import net.corda.node.services.statemachine.ActionExecutor
|
||||
import net.corda.node.services.statemachine.ErrorState
|
||||
import net.corda.node.services.statemachine.Event
|
||||
@ -42,9 +43,9 @@ class DumpHistoryOnErrorInterceptor(val delegate: TransitionExecutor) : Transiti
|
||||
// Just if we decide to propagate, and not if just on the way to the hospital. Only log at debug level here - the flow transition
|
||||
// information is often unhelpful in the logs, and the actual cause of the problem will be logged elsewhere.
|
||||
if (nextState.checkpoint.errorState is ErrorState.Errored && nextState.checkpoint.errorState.propagating) {
|
||||
log.debug("Flow ${fiber.id} errored, dumping all transitions:\n${record!!.joinToString("\n")}")
|
||||
log.warn("Flow ${fiber.id} errored, dumping all transitions:\n${record!!.joinToString("\n")}")
|
||||
for (error in nextState.checkpoint.errorState.errors) {
|
||||
log.debug("Flow ${fiber.id} error", error.exception)
|
||||
log.warn("Flow ${fiber.id} error", error.exception)
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user