mirror of
https://github.com/corda/corda.git
synced 2025-06-12 04:08:26 +00:00
[ENT-1289] Exit node if db down (#918)
* Exit if the db connection is lost
This commit is contained in:
@ -234,7 +234,11 @@ Mutual exclusion
|
|||||||
----------------
|
----------------
|
||||||
To avoid accidentally running all hot-cold nodes at the same time, a simple mechanism can be used by adding the following
|
To avoid accidentally running all hot-cold nodes at the same time, a simple mechanism can be used by adding the following
|
||||||
section to the configuration files. The mechanism is called *Mutual Exclusion* and it ensures that only one active node
|
section to the configuration files. The mechanism is called *Mutual Exclusion* and it ensures that only one active node
|
||||||
exists, all others will shut down shortly after starting. A standard configuration example is shown below:
|
exists, all others will shut down shortly after starting.
|
||||||
|
|
||||||
|
The *Mutual Exclusion* mechanism also acts as database connection checker. A running node will acquire and periodically
|
||||||
|
update a mutual exclusion lease which is stored in the database. The node will exit if the database connection is lost.
|
||||||
|
A standard configuration example is shown below:
|
||||||
|
|
||||||
.. sourcecode:: none
|
.. sourcecode:: none
|
||||||
|
|
||||||
@ -253,7 +257,7 @@ exists, all others will shut down shortly after starting. A standard configurati
|
|||||||
used in the mutual exclusion process (signal which corda instance is active and using the database). Default value is the
|
used in the mutual exclusion process (signal which corda instance is active and using the database). Default value is the
|
||||||
machines host name.
|
machines host name.
|
||||||
|
|
||||||
:updateInterval: Period(milliseconds) over which the running node updates the mutual exclusion lease.
|
:updateInterval: Period(milliseconds) over which the running node updates the mutual exclusion lease. Node will exit if database connection is lost.
|
||||||
|
|
||||||
:waitInterval: Amount of time(milliseconds) to wait since last mutual exclusion lease update before being able to become
|
:waitInterval: Amount of time(milliseconds) to wait since last mutual exclusion lease update before being able to become
|
||||||
the active node. This has to be greater than updateInterval.
|
the active node. This has to be greater than updateInterval.
|
||||||
|
@ -17,6 +17,7 @@ import net.corda.nodeapi.internal.persistence.CordaPersistence
|
|||||||
import net.corda.nodeapi.internal.persistence.NODE_DATABASE_PREFIX
|
import net.corda.nodeapi.internal.persistence.NODE_DATABASE_PREFIX
|
||||||
import org.hibernate.Session
|
import org.hibernate.Session
|
||||||
import java.io.Serializable
|
import java.io.Serializable
|
||||||
|
import java.sql.SQLTransientConnectionException
|
||||||
import java.time.Duration
|
import java.time.Duration
|
||||||
import java.time.LocalDateTime
|
import java.time.LocalDateTime
|
||||||
import java.time.temporal.ChronoField
|
import java.time.temporal.ChronoField
|
||||||
@ -102,20 +103,25 @@ class RunOnceService(private val database: CordaPersistence, private val machine
|
|||||||
if (running.compareAndSet(false, true)) {
|
if (running.compareAndSet(false, true)) {
|
||||||
try {
|
try {
|
||||||
database.transaction {
|
database.transaction {
|
||||||
val mutualExclusion = getMutualExclusion(session)
|
try {
|
||||||
|
val mutualExclusion = getMutualExclusion(session)
|
||||||
|
|
||||||
if (mutualExclusion == null) {
|
if (mutualExclusion == null) {
|
||||||
log.error("$machineName PID: $pid failed mutual exclusion update. " +
|
log.error("$machineName PID: $pid failed mutual exclusion update. " +
|
||||||
"Expected to have a row in $TABLE table. " +
|
"Expected to have a row in $TABLE table. " +
|
||||||
"Check if another node is running")
|
"Check if another node is running")
|
||||||
System.exit(1)
|
System.exit(1)
|
||||||
} else if (mutualExclusion.machineName != machineName || mutualExclusion.pid != pid) {
|
} else if (mutualExclusion.machineName != machineName || mutualExclusion.pid != pid) {
|
||||||
log.error("Expected $machineName PID: $pid but was ${mutualExclusion.machineName} PID: ${mutualExclusion.pid}. " +
|
log.error("Expected $machineName PID: $pid but was ${mutualExclusion.machineName} PID: ${mutualExclusion.pid}. " +
|
||||||
"Check if another node is running")
|
"Check if another node is running")
|
||||||
|
System.exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
updateTimestamp(session, mutualExclusion!!)
|
||||||
|
} catch (exception: SQLTransientConnectionException) {
|
||||||
|
log.error("Database connection down", exception)
|
||||||
System.exit(1)
|
System.exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
updateTimestamp(session, mutualExclusion!!)
|
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
running.set(false)
|
running.set(false)
|
||||||
|
Reference in New Issue
Block a user