[ENT-1289] Exit node if db down (#918)

* Exit if the db connection is lost
This commit is contained in:
cburlinchon
2018-06-22 14:57:43 +01:00
committed by GitHub
parent d0300261cb
commit 465915ac83
2 changed files with 23 additions and 13 deletions

View File

@ -234,7 +234,11 @@ Mutual exclusion
---------------- ----------------
To avoid accidentally running all hot-cold nodes at the same time, a simple mechanism can be used by adding the following To avoid accidentally running all hot-cold nodes at the same time, a simple mechanism can be used by adding the following
section to the configuration files. The mechanism is called *Mutual Exclusion* and it ensures that only one active node section to the configuration files. The mechanism is called *Mutual Exclusion* and it ensures that only one active node
exists, all others will shut down shortly after starting. A standard configuration example is shown below: exists, all others will shut down shortly after starting.
The *Mutual Exclusion* mechanism also acts as database connection checker. A running node will acquire and periodically
update a mutual exclusion lease which is stored in the database. The node will exit if the database connection is lost.
A standard configuration example is shown below:
.. sourcecode:: none .. sourcecode:: none
@ -253,7 +257,7 @@ exists, all others will shut down shortly after starting. A standard configurati
used in the mutual exclusion process (signal which corda instance is active and using the database). Default value is the used in the mutual exclusion process (signal which corda instance is active and using the database). Default value is the
machines host name. machines host name.
:updateInterval: Period(milliseconds) over which the running node updates the mutual exclusion lease. :updateInterval: Period(milliseconds) over which the running node updates the mutual exclusion lease. Node will exit if database connection is lost.
:waitInterval: Amount of time(milliseconds) to wait since last mutual exclusion lease update before being able to become :waitInterval: Amount of time(milliseconds) to wait since last mutual exclusion lease update before being able to become
the active node. This has to be greater than updateInterval. the active node. This has to be greater than updateInterval.

View File

@ -17,6 +17,7 @@ import net.corda.nodeapi.internal.persistence.CordaPersistence
import net.corda.nodeapi.internal.persistence.NODE_DATABASE_PREFIX import net.corda.nodeapi.internal.persistence.NODE_DATABASE_PREFIX
import org.hibernate.Session import org.hibernate.Session
import java.io.Serializable import java.io.Serializable
import java.sql.SQLTransientConnectionException
import java.time.Duration import java.time.Duration
import java.time.LocalDateTime import java.time.LocalDateTime
import java.time.temporal.ChronoField import java.time.temporal.ChronoField
@ -102,20 +103,25 @@ class RunOnceService(private val database: CordaPersistence, private val machine
if (running.compareAndSet(false, true)) { if (running.compareAndSet(false, true)) {
try { try {
database.transaction { database.transaction {
val mutualExclusion = getMutualExclusion(session) try {
val mutualExclusion = getMutualExclusion(session)
if (mutualExclusion == null) { if (mutualExclusion == null) {
log.error("$machineName PID: $pid failed mutual exclusion update. " + log.error("$machineName PID: $pid failed mutual exclusion update. " +
"Expected to have a row in $TABLE table. " + "Expected to have a row in $TABLE table. " +
"Check if another node is running") "Check if another node is running")
System.exit(1) System.exit(1)
} else if (mutualExclusion.machineName != machineName || mutualExclusion.pid != pid) { } else if (mutualExclusion.machineName != machineName || mutualExclusion.pid != pid) {
log.error("Expected $machineName PID: $pid but was ${mutualExclusion.machineName} PID: ${mutualExclusion.pid}. " + log.error("Expected $machineName PID: $pid but was ${mutualExclusion.machineName} PID: ${mutualExclusion.pid}. " +
"Check if another node is running") "Check if another node is running")
System.exit(1)
}
updateTimestamp(session, mutualExclusion!!)
} catch (exception: SQLTransientConnectionException) {
log.error("Database connection down", exception)
System.exit(1) System.exit(1)
} }
updateTimestamp(session, mutualExclusion!!)
} }
} finally { } finally {
running.set(false) running.set(false)