Avoid BFT printStackTraces when cluster is starting up (#899)

This commit is contained in:
Andrzej Cichocki 2017-06-30 10:52:24 +01:00 committed by GitHub
parent a08f701dc5
commit 88c8f4b351
2 changed files with 38 additions and 3 deletions

View File

@ -179,8 +179,11 @@ object BFTSMaRt {
// TODO: Use Requery with proper DB schema instead of JDBCHashMap. // TODO: Use Requery with proper DB schema instead of JDBCHashMap.
// Must be initialised before ServiceReplica is started // Must be initialised before ServiceReplica is started
private val commitLog = services.database.transaction { JDBCHashMap<StateRef, UniquenessProvider.ConsumingTx>(tableName) } private val commitLog = services.database.transaction { JDBCHashMap<StateRef, UniquenessProvider.ConsumingTx>(tableName) }
@Suppress("LeakingThis") private val replica = run {
private val replica = CordaServiceReplica(replicaId, config.path, this) config.waitUntilReplicaWillNotPrintStackTrace(replicaId)
@Suppress("LeakingThis")
CordaServiceReplica(replicaId, config.path, this)
}
fun dispose() { fun dispose() {
replica.dispose() replica.dispose()

View File

@ -2,10 +2,15 @@ package net.corda.node.services.transactions
import com.google.common.net.HostAndPort import com.google.common.net.HostAndPort
import net.corda.core.div import net.corda.core.div
import net.corda.core.utilities.debug
import net.corda.core.utilities.loggerFor
import java.io.FileWriter import java.io.FileWriter
import java.io.PrintWriter import java.io.PrintWriter
import java.net.InetAddress import java.net.InetAddress
import java.net.Socket
import java.net.SocketException
import java.nio.file.Files import java.nio.file.Files
import java.util.concurrent.TimeUnit.MILLISECONDS
/** /**
* BFT SMaRt can only be configured via files in a configHome directory. * BFT SMaRt can only be configured via files in a configHome directory.
@ -14,6 +19,7 @@ import java.nio.file.Files
*/ */
class BFTSMaRtConfig(private val replicaAddresses: List<HostAndPort>, debug: Boolean = false) : PathManager<BFTSMaRtConfig>(Files.createTempDirectory("bft-smart-config")) { class BFTSMaRtConfig(private val replicaAddresses: List<HostAndPort>, debug: Boolean = false) : PathManager<BFTSMaRtConfig>(Files.createTempDirectory("bft-smart-config")) {
companion object { companion object {
private val log = loggerFor<BFTSMaRtConfig>()
internal val portIsClaimedFormat = "Port %s is claimed by another replica: %s" internal val portIsClaimedFormat = "Port %s is claimed by another replica: %s"
} }
@ -47,12 +53,38 @@ class BFTSMaRtConfig(private val replicaAddresses: List<HostAndPort>, debug: Boo
} }
} }
fun waitUntilReplicaWillNotPrintStackTrace(contextReplicaId: Int) {
// A replica will printStackTrace until all lower-numbered replicas are listening.
// But we can't probe a replica without it logging EOFException when our probe succeeds.
// So to keep logging to a minimum we only check the previous replica:
val peerId = contextReplicaId - 1
if (peerId < 0) return
// The printStackTrace we want to avoid is in replica-replica communication code:
val address = BFTSMaRtPort.FOR_REPLICAS.ofReplica(replicaAddresses[peerId])
log.debug { "Waiting for replica $peerId to start listening on: $address" }
while (!address.isListening()) MILLISECONDS.sleep(200)
log.debug { "Replica $peerId is ready for P2P." }
}
private fun replicaPorts(replicaId: Int): List<HostAndPort> { private fun replicaPorts(replicaId: Int): List<HostAndPort> {
val base = replicaAddresses[replicaId] val base = replicaAddresses[replicaId]
return (0..1).map { HostAndPort.fromParts(base.host, base.port + it) } return BFTSMaRtPort.values().map { it.ofReplica(base) }
} }
} }
private enum class BFTSMaRtPort(private val off: Int) {
FOR_CLIENTS(0),
FOR_REPLICAS(1);
fun ofReplica(base: HostAndPort) = HostAndPort.fromParts(base.host, base.port + off)
}
private fun HostAndPort.isListening() = try {
Socket(host, port).use { true } // Will cause one error to be logged in the replica on success.
} catch (e: SocketException) {
false
}
fun maxFaultyReplicas(clusterSize: Int) = (clusterSize - 1) / 3 fun maxFaultyReplicas(clusterSize: Int) = (clusterSize - 1) / 3
fun minCorrectReplicas(clusterSize: Int) = (2 * clusterSize + 3) / 3 fun minCorrectReplicas(clusterSize: Int) = (2 * clusterSize + 3) / 3
fun minClusterSize(maxFaultyReplicas: Int) = maxFaultyReplicas * 3 + 1 fun minClusterSize(maxFaultyReplicas: Int) = maxFaultyReplicas * 3 + 1