mirror of
https://github.com/corda/corda.git
synced 2024-12-18 20:47:57 +00:00
CORDA-1836: Checkpoints which cannot be deserialised no longer prevent the node from starting up (#5367)
Instead they are logged with the flow ID. The kill flow RPC has been updated to delete such checkpoints.
This commit is contained in:
parent
e2fcc1edd5
commit
39adfbb9ea
@ -133,7 +133,12 @@ class SingleThreadedStateMachineManager(
|
||||
override fun start(tokenizableServices: List<Any>) {
|
||||
checkQuasarJavaAgentPresence()
|
||||
val checkpointSerializationContext = CheckpointSerializationDefaults.CHECKPOINT_CONTEXT.withTokenContext(
|
||||
CheckpointSerializeAsTokenContextImpl(tokenizableServices, CheckpointSerializationDefaults.CHECKPOINT_SERIALIZER, CheckpointSerializationDefaults.CHECKPOINT_CONTEXT, serviceHub)
|
||||
CheckpointSerializeAsTokenContextImpl(
|
||||
tokenizableServices,
|
||||
CheckpointSerializationDefaults.CHECKPOINT_SERIALIZER,
|
||||
CheckpointSerializationDefaults.CHECKPOINT_CONTEXT,
|
||||
serviceHub
|
||||
)
|
||||
)
|
||||
this.checkpointSerializationContext = checkpointSerializationContext
|
||||
this.actionExecutor = makeActionExecutor(checkpointSerializationContext)
|
||||
@ -239,9 +244,10 @@ class SingleThreadedStateMachineManager(
|
||||
unfinishedFibers.countDown()
|
||||
}
|
||||
} else {
|
||||
// TODO replace with a clustered delete after we'll support clustered nodes
|
||||
logger.debug("Unable to kill a flow unknown to physical node. Might be processed by another physical node.")
|
||||
false
|
||||
// It may be that the id refers to a checkpoint that couldn't be deserialised into a flow, so we delete it if it exists.
|
||||
database.transaction {
|
||||
checkpointStorage.removeCheckpoint(id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -320,11 +326,9 @@ class SingleThreadedStateMachineManager(
|
||||
it.mapNotNull { (id, serializedCheckpoint) ->
|
||||
// If a flow is added before start() then don't attempt to restore it
|
||||
mutex.locked { if (id in flows) return@mapNotNull null }
|
||||
val checkpoint = deserializeCheckpoint(serializedCheckpoint) ?: return@mapNotNull null
|
||||
logger.debug { "Restored $checkpoint" }
|
||||
createFlowFromCheckpoint(
|
||||
id = id,
|
||||
checkpoint = checkpoint,
|
||||
serializedCheckpoint = serializedCheckpoint,
|
||||
initialDeduplicationHandler = null,
|
||||
isAnyCheckpointPersisted = true,
|
||||
isStartIdempotent = false
|
||||
@ -348,24 +352,21 @@ class SingleThreadedStateMachineManager(
|
||||
return
|
||||
}
|
||||
val flow = if (currentState.isAnyCheckpointPersisted) {
|
||||
// We intentionally grab the checkpoint from storage rather than relying on the one referenced by currentState. This is so that
|
||||
// we mirror exactly what happens when restarting the node.
|
||||
val serializedCheckpoint = checkpointStorage.getCheckpoint(flowId)
|
||||
if (serializedCheckpoint == null) {
|
||||
logger.error("Unable to find database checkpoint for flow $flowId. Something is very wrong. The flow will not retry.")
|
||||
return
|
||||
}
|
||||
val checkpoint = deserializeCheckpoint(serializedCheckpoint)
|
||||
if (checkpoint == null) {
|
||||
logger.error("Unable to deserialize database checkpoint for flow $flowId. Something is very wrong. The flow will not retry.")
|
||||
return
|
||||
}
|
||||
// Resurrect flow
|
||||
createFlowFromCheckpoint(
|
||||
id = flowId,
|
||||
checkpoint = checkpoint,
|
||||
serializedCheckpoint = serializedCheckpoint,
|
||||
initialDeduplicationHandler = null,
|
||||
isAnyCheckpointPersisted = true,
|
||||
isStartIdempotent = false
|
||||
)
|
||||
) ?: return
|
||||
} else {
|
||||
// Just flow initiation message
|
||||
null
|
||||
@ -656,15 +657,6 @@ class SingleThreadedStateMachineManager(
|
||||
}
|
||||
}
|
||||
|
||||
private fun deserializeCheckpoint(serializedCheckpoint: SerializedBytes<Checkpoint>): Checkpoint? {
|
||||
return try {
|
||||
serializedCheckpoint.checkpointDeserialize(context = checkpointSerializationContext!!)
|
||||
} catch (exception: Exception) {
|
||||
logger.error("Encountered unrestorable checkpoint!", exception)
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private fun verifyFlowLogicIsSuspendable(logic: FlowLogic<Any?>) {
|
||||
// Quasar requires (in Java 8) that at least the call method be annotated suspendable. Unfortunately, it's
|
||||
// easy to forget to add this when creating a new flow, so we check here to give the user a better error.
|
||||
@ -693,18 +685,28 @@ class SingleThreadedStateMachineManager(
|
||||
)
|
||||
}
|
||||
|
||||
private inline fun <reified T : Any> tryCheckpointDeserialize(bytes: SerializedBytes<T>, flowId: StateMachineRunId): T? {
|
||||
return try {
|
||||
bytes.checkpointDeserialize(context = checkpointSerializationContext!!)
|
||||
} catch (e: Exception) {
|
||||
logger.error("Unable to deserialize checkpoint for flow $flowId. Something is very wrong and this flow will be ignored.", e)
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private fun createFlowFromCheckpoint(
|
||||
id: StateMachineRunId,
|
||||
checkpoint: Checkpoint,
|
||||
serializedCheckpoint: SerializedBytes<Checkpoint>,
|
||||
isAnyCheckpointPersisted: Boolean,
|
||||
isStartIdempotent: Boolean,
|
||||
initialDeduplicationHandler: DeduplicationHandler?
|
||||
): Flow {
|
||||
): Flow? {
|
||||
val checkpoint = tryCheckpointDeserialize(serializedCheckpoint, id) ?: return null
|
||||
val flowState = checkpoint.flowState
|
||||
val resultFuture = openFuture<Any?>()
|
||||
val fiber = when (flowState) {
|
||||
is FlowState.Unstarted -> {
|
||||
val logic = flowState.frozenFlowLogic.checkpointDeserialize(context = checkpointSerializationContext!!)
|
||||
val logic = tryCheckpointDeserialize(flowState.frozenFlowLogic, id) ?: return null
|
||||
val state = StateMachineState(
|
||||
checkpoint = checkpoint,
|
||||
pendingDeduplicationHandlers = initialDeduplicationHandler?.let { listOf(it) } ?: emptyList(),
|
||||
@ -723,7 +725,7 @@ class SingleThreadedStateMachineManager(
|
||||
fiber
|
||||
}
|
||||
is FlowState.Started -> {
|
||||
val fiber = flowState.frozenFiber.checkpointDeserialize(context = checkpointSerializationContext!!)
|
||||
val fiber = tryCheckpointDeserialize(flowState.frozenFiber, id) ?: return null
|
||||
val state = StateMachineState(
|
||||
checkpoint = checkpoint,
|
||||
pendingDeduplicationHandlers = initialDeduplicationHandler?.let { listOf(it) } ?: emptyList(),
|
||||
|
Loading…
Reference in New Issue
Block a user