From 465915ac83b067da2d97f0b88cdb19d49172380b Mon Sep 17 00:00:00 2001 From: cburlinchon Date: Fri, 22 Jun 2018 14:57:43 +0100 Subject: [PATCH] [ENT-1289] Exit node if db down (#918) * Exit if the db connection is lost --- docs/source/hot-cold-deployment.rst | 8 ++++-- .../services/persistence/RunOnceService.kt | 28 +++++++++++-------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/docs/source/hot-cold-deployment.rst b/docs/source/hot-cold-deployment.rst index fbae7a15b7..f42d0d6c61 100644 --- a/docs/source/hot-cold-deployment.rst +++ b/docs/source/hot-cold-deployment.rst @@ -234,7 +234,11 @@ Mutual exclusion ---------------- To avoid accidentally running all hot-cold nodes at the same time, a simple mechanism can be used by adding the following section to the configuration files. The mechanism is called *Mutual Exclusion* and it ensures that only one active node -exists, all others will shut down shortly after starting. A standard configuration example is shown below: +exists, all others will shut down shortly after starting. + +The *Mutual Exclusion* mechanism also acts as database connection checker. A running node will acquire and periodically +update a mutual exclusion lease which is stored in the database. The node will exit if the database connection is lost. +A standard configuration example is shown below: .. sourcecode:: none @@ -253,7 +257,7 @@ exists, all others will shut down shortly after starting. A standard configurati used in the mutual exclusion process (signal which corda instance is active and using the database). Default value is the machines host name. -:updateInterval: Period(milliseconds) over which the running node updates the mutual exclusion lease. +:updateInterval: Period(milliseconds) over which the running node updates the mutual exclusion lease. Node will exit if database connection is lost. :waitInterval: Amount of time(milliseconds) to wait since last mutual exclusion lease update before being able to become the active node. This has to be greater than updateInterval. diff --git a/node/src/main/kotlin/net/corda/node/services/persistence/RunOnceService.kt b/node/src/main/kotlin/net/corda/node/services/persistence/RunOnceService.kt index 590ded135a..ab1ca72da1 100644 --- a/node/src/main/kotlin/net/corda/node/services/persistence/RunOnceService.kt +++ b/node/src/main/kotlin/net/corda/node/services/persistence/RunOnceService.kt @@ -17,6 +17,7 @@ import net.corda.nodeapi.internal.persistence.CordaPersistence import net.corda.nodeapi.internal.persistence.NODE_DATABASE_PREFIX import org.hibernate.Session import java.io.Serializable +import java.sql.SQLTransientConnectionException import java.time.Duration import java.time.LocalDateTime import java.time.temporal.ChronoField @@ -102,20 +103,25 @@ class RunOnceService(private val database: CordaPersistence, private val machine if (running.compareAndSet(false, true)) { try { database.transaction { - val mutualExclusion = getMutualExclusion(session) + try { + val mutualExclusion = getMutualExclusion(session) - if (mutualExclusion == null) { - log.error("$machineName PID: $pid failed mutual exclusion update. " + - "Expected to have a row in $TABLE table. " + - "Check if another node is running") - System.exit(1) - } else if (mutualExclusion.machineName != machineName || mutualExclusion.pid != pid) { - log.error("Expected $machineName PID: $pid but was ${mutualExclusion.machineName} PID: ${mutualExclusion.pid}. " + - "Check if another node is running") + if (mutualExclusion == null) { + log.error("$machineName PID: $pid failed mutual exclusion update. " + + "Expected to have a row in $TABLE table. " + + "Check if another node is running") + System.exit(1) + } else if (mutualExclusion.machineName != machineName || mutualExclusion.pid != pid) { + log.error("Expected $machineName PID: $pid but was ${mutualExclusion.machineName} PID: ${mutualExclusion.pid}. " + + "Check if another node is running") + System.exit(1) + } + + updateTimestamp(session, mutualExclusion!!) + } catch (exception: SQLTransientConnectionException) { + log.error("Database connection down", exception) System.exit(1) } - - updateTimestamp(session, mutualExclusion!!) } } finally { running.set(false)