Merge pull request #844 from corda/git-merge-3f2e653f0
Merge to 3f2e653f0
@ -1,7 +1,7 @@
|
||||
# !! DO NOT MODIFY THE API FILE IN THIS DIRECTORY !!
|
||||
|
||||
The `api-current.txt` file contains a summary of Corda's current public APIs,
|
||||
as generated by the `api-scanner` Gradle plugin. (See [here](../gradle-plugins/api-scanner/README.md) for a detailed description of this plugin.) It will be regenerated and the copy in this repository updated by the Release Manager with
|
||||
as generated by the `api-scanner` Gradle plugin. (See [here](https://github.com/corda/corda-gradle-plugins/blob/master/api-scanner/README.md) for a detailed description of this plugin.) It will be regenerated and the copy in this repository updated by the Release Manager with
|
||||
each new Corda release. It will not be modified otherwise except under special circumstances that will require extra approval.
|
||||
|
||||
Deleting or changing the existing Corda APIs listed in `api-current.txt` may
|
||||
|
2
.gitignore
vendored
@ -39,7 +39,6 @@ lib/quasar.jar
|
||||
.idea/runConfigurations
|
||||
.idea/dictionaries
|
||||
.idea/codeStyles/
|
||||
/gradle-plugins/.idea/
|
||||
|
||||
# Include the -parameters compiler option by default in IntelliJ required for serialization.
|
||||
!.idea/compiler.xml
|
||||
@ -62,7 +61,6 @@ lib/quasar.jar
|
||||
# Gradle:
|
||||
# .idea/gradle.xml
|
||||
# .idea/libraries
|
||||
/gradle-plugins/gradle*
|
||||
|
||||
# Mongo Explorer plugin:
|
||||
# .idea/mongoSettings.xml
|
||||
|
102
.idea/compiler.xml
generated
@ -10,40 +10,18 @@
|
||||
<module name="bank-of-corda-demo_integrationTest" target="1.8" />
|
||||
<module name="bank-of-corda-demo_main" target="1.8" />
|
||||
<module name="bank-of-corda-demo_test" target="1.8" />
|
||||
<module name="behave-tools_main" target="1.8" />
|
||||
<module name="behave-tools_test" target="1.8" />
|
||||
<module name="behave_api" target="1.8" />
|
||||
<module name="behave_behave" target="1.8" />
|
||||
<module name="behave_main" target="1.8" />
|
||||
<module name="behave_scenario" target="1.8" />
|
||||
<module name="behave_smokeTest" target="1.8" />
|
||||
<module name="behave_test" target="1.8" />
|
||||
<module name="blobinspector_main" target="1.8" />
|
||||
<module name="blobinspector_test" target="1.8" />
|
||||
<module name="bootstrapper_main" target="1.8" />
|
||||
<module name="bootstrapper_test" target="1.8" />
|
||||
<module name="bridge_integrationTest" target="1.8" />
|
||||
<module name="bridge_main" target="1.8" />
|
||||
<module name="bridge_test" target="1.8" />
|
||||
<module name="bridgecapsule_main" target="1.6" />
|
||||
<module name="bridgecapsule_smokeTest" target="1.6" />
|
||||
<module name="bridgecapsule_test" target="1.6" />
|
||||
<module name="bridges_integrationTest" target="1.8" />
|
||||
<module name="bridges_main" target="1.8" />
|
||||
<module name="bridges_test" target="1.8" />
|
||||
<module name="buildSrc_main" target="1.8" />
|
||||
<module name="buildSrc_test" target="1.8" />
|
||||
<module name="business-network-demo_integrationTest" target="1.8" />
|
||||
<module name="business-network-demo_main" target="1.8" />
|
||||
<module name="business-network-demo_test" target="1.8" />
|
||||
<module name="canonicalizer_main" target="1.8" />
|
||||
<module name="canonicalizer_test" target="1.8" />
|
||||
<module name="capsule-crr-submission_main" target="1.8" />
|
||||
<module name="capsule-crr-submission_test" target="1.8" />
|
||||
<module name="capsule-hsm-cert-generator_main" target="1.8" />
|
||||
<module name="capsule-hsm-cert-generator_test" target="1.8" />
|
||||
<module name="capsule-hsm_main" target="1.8" />
|
||||
<module name="capsule-hsm_test" target="1.8" />
|
||||
<module name="client_main" target="1.8" />
|
||||
<module name="client_test" target="1.8" />
|
||||
<module name="confidential-identities_main" target="1.8" />
|
||||
@ -54,36 +32,34 @@
|
||||
<module name="corda-core_integrationTest" target="1.8" />
|
||||
<module name="corda-core_smokeTest" target="1.8" />
|
||||
<module name="corda-finance_integrationTest" target="1.8" />
|
||||
<module name="corda-project-tools_main" target="1.8" />
|
||||
<module name="corda-project-tools_test" target="1.8" />
|
||||
<module name="corda-project_main" target="1.8" />
|
||||
<module name="corda-project_test" target="1.8" />
|
||||
<module name="corda-webserver_integrationTest" target="1.8" />
|
||||
<module name="corda-webserver_main" target="1.8" />
|
||||
<module name="corda-webserver_test" target="1.8" />
|
||||
<module name="cordapp-configuration_main" target="1.8" />
|
||||
<module name="cordapp-configuration_test" target="1.8" />
|
||||
<module name="cordapp_integrationTest" target="1.8" />
|
||||
<module name="cordapp_main" target="1.8" />
|
||||
<module name="cordapp_test" target="1.8" />
|
||||
<module name="cordform-common_main" target="1.8" />
|
||||
<module name="cordform-common_test" target="1.8" />
|
||||
<module name="cordformation_main" target="1.8" />
|
||||
<module name="cordformation_runnodes" target="1.8" />
|
||||
<module name="cordformation_test" target="1.8" />
|
||||
<module name="core_extraResource" target="1.8" />
|
||||
<module name="core_integrationTest" target="1.8" />
|
||||
<module name="core_main" target="1.8" />
|
||||
<module name="core_smokeTest" target="1.8" />
|
||||
<module name="core_smokeTestPlugins" target="1.8" />
|
||||
<module name="core_test" target="1.8" />
|
||||
<module name="dbmigration_main" target="1.8" />
|
||||
<module name="dbmigration_test" target="1.8" />
|
||||
<module name="demobench_main" target="1.8" />
|
||||
<module name="demobench_test" target="1.8" />
|
||||
<module name="docs_main" target="1.8" />
|
||||
<module name="docs_source_example-code_integrationTest" target="1.8" />
|
||||
<module name="docs_source_example-code_main" target="1.8" />
|
||||
<module name="docs_source_example-code_test" target="1.8" />
|
||||
<module name="docs_test" target="1.8" />
|
||||
<module name="example-code_integrationTest" target="1.8" />
|
||||
<module name="example-code_main" target="1.8" />
|
||||
<module name="example-code_test" target="1.8" />
|
||||
<module name="experimental-behave_behave" target="1.8" />
|
||||
<module name="experimental-behave_main" target="1.8" />
|
||||
<module name="experimental-behave_scenario" target="1.8" />
|
||||
<module name="experimental-behave_smokeTest" target="1.8" />
|
||||
<module name="experimental-behave_test" target="1.8" />
|
||||
<module name="experimental-kryo-hook_main" target="1.8" />
|
||||
<module name="experimental-kryo-hook_test" target="1.8" />
|
||||
<module name="experimental_main" target="1.8" />
|
||||
@ -95,8 +71,6 @@
|
||||
<module name="finance_integrationTest" target="1.8" />
|
||||
<module name="finance_main" target="1.8" />
|
||||
<module name="finance_test" target="1.8" />
|
||||
<module name="flow-hook_main" target="1.8" />
|
||||
<module name="flow-hook_test" target="1.8" />
|
||||
<module name="flows_integrationTest" target="1.8" />
|
||||
<module name="flows_main" target="1.8" />
|
||||
<module name="flows_test" target="1.8" />
|
||||
@ -104,8 +78,13 @@
|
||||
<module name="gradle-plugins-cordapp_test" target="1.8" />
|
||||
<module name="graphs_main" target="1.8" />
|
||||
<module name="graphs_test" target="1.8" />
|
||||
<module name="intellij-plugin_main" target="1.8" />
|
||||
<module name="intellij-plugin_test" target="1.8" />
|
||||
<module name="irs-demo-cordapp_integrationTest" target="1.8" />
|
||||
<module name="irs-demo-cordapp_main" target="1.8" />
|
||||
<module name="irs-demo-cordapp_main~1" target="1.8" />
|
||||
<module name="irs-demo-cordapp_test" target="1.8" />
|
||||
<module name="irs-demo-cordapp_test~1" target="1.8" />
|
||||
<module name="irs-demo-web_main" target="1.8" />
|
||||
<module name="irs-demo-web_test" target="1.8" />
|
||||
<module name="irs-demo_integrationTest" target="1.8" />
|
||||
<module name="irs-demo_main" target="1.8" />
|
||||
<module name="irs-demo_systemTest" target="1.8" />
|
||||
@ -117,19 +96,12 @@
|
||||
<module name="jfx_integrationTest" target="1.8" />
|
||||
<module name="jfx_main" target="1.8" />
|
||||
<module name="jfx_test" target="1.8" />
|
||||
<module name="jmeter_main" target="1.8" />
|
||||
<module name="jmeter_test" target="1.8" />
|
||||
<module name="kryo-hook_main" target="1.8" />
|
||||
<module name="kryo-hook_test" target="1.8" />
|
||||
<module name="loadtest_main" target="1.8" />
|
||||
<module name="loadtest_test" target="1.8" />
|
||||
<module name="mock_main" target="1.8" />
|
||||
<module name="mock_test" target="1.8" />
|
||||
<module name="network-management-capsule_main" target="1.8" />
|
||||
<module name="network-management-capsule_test" target="1.8" />
|
||||
<module name="network-management_integrationTest" target="1.8" />
|
||||
<module name="network-management_main" target="1.8" />
|
||||
<module name="network-management_test" target="1.8" />
|
||||
<module name="network-visualiser_main" target="1.8" />
|
||||
<module name="network-visualiser_test" target="1.8" />
|
||||
<module name="node-api_main" target="1.8" />
|
||||
@ -145,62 +117,45 @@
|
||||
<module name="node_test" target="1.8" />
|
||||
<module name="notary-demo_main" target="1.8" />
|
||||
<module name="notary-demo_test" target="1.8" />
|
||||
<module name="notaryhealthcheck_main" target="1.8" />
|
||||
<module name="notaryhealthcheck_test" target="1.8" />
|
||||
<module name="perftestcordapp_integrationTest" target="1.8" />
|
||||
<module name="perftestcordapp_main" target="1.8" />
|
||||
<module name="perftestcordapp_test" target="1.8" />
|
||||
<module name="qa-behave_main" target="1.8" />
|
||||
<module name="qa-behave_test" target="1.8" />
|
||||
<module name="qa_main" target="1.8" />
|
||||
<module name="qa_test" target="1.8" />
|
||||
<module name="publish-utils_main" target="1.8" />
|
||||
<module name="publish-utils_test" target="1.8" />
|
||||
<module name="quasar-hook_main" target="1.8" />
|
||||
<module name="quasar-hook_test" target="1.8" />
|
||||
<module name="quasar-utils_main" target="1.8" />
|
||||
<module name="quasar-utils_test" target="1.8" />
|
||||
<module name="registration-tool_integrationTest" target="1.8" />
|
||||
<module name="registration-tool_main" target="1.8" />
|
||||
<module name="registration-tool_test" target="1.8" />
|
||||
<module name="rpc-proxy_main" target="1.8" />
|
||||
<module name="rpc-proxy_rpcProxy" target="1.8" />
|
||||
<module name="rpc-proxy_smokeTest" target="1.8" />
|
||||
<module name="rpc-proxy_test" target="1.8" />
|
||||
<module name="rpc_integrationTest" target="1.8" />
|
||||
<module name="rpc_main" target="1.8" />
|
||||
<module name="rpc_smokeTest" target="1.8" />
|
||||
<module name="rpc_test" target="1.8" />
|
||||
<module name="samples-business-network-demo_main" target="1.8" />
|
||||
<module name="samples-business-network-demo_test" target="1.8" />
|
||||
<module name="samples_main" target="1.8" />
|
||||
<module name="samples_test" target="1.8" />
|
||||
<module name="sandbox_main" target="1.8" />
|
||||
<module name="sandbox_test" target="1.8" />
|
||||
<module name="sgx-hsm-tool_main" target="1.8" />
|
||||
<module name="sgx-hsm-tool_test" target="1.8" />
|
||||
<module name="sgx-jvm_hsm-tool_main" target="1.8" />
|
||||
<module name="sgx-jvm_hsm-tool_test" target="1.8" />
|
||||
<module name="shell_integrationTest" target="1.8" />
|
||||
<module name="shell_main" target="1.8" />
|
||||
<module name="shell_test" target="1.8" />
|
||||
<module name="simm-valuation-demo_integrationTest" target="1.8" />
|
||||
<module name="simm-valuation-demo_main" target="1.8" />
|
||||
<module name="simm-valuation-demo_scenario" target="1.8" />
|
||||
<module name="simm-valuation-demo_scenarioTest" target="1.8" />
|
||||
<module name="simm-valuation-demo_test" target="1.8" />
|
||||
<module name="smoke-test-utils_main" target="1.8" />
|
||||
<module name="smoke-test-utils_test" target="1.8" />
|
||||
<module name="source-example-code_integrationTest" target="1.8" />
|
||||
<module name="source-example-code_main" target="1.8" />
|
||||
<module name="source-example-code_test" target="1.8" />
|
||||
<module name="test-common_main" target="1.8" />
|
||||
<module name="test-common_test" target="1.8" />
|
||||
<module name="test-utils_integrationTest" target="1.8" />
|
||||
<module name="test-utils_main" target="1.8" />
|
||||
<module name="test-utils_test" target="1.8" />
|
||||
<module name="testing-node-driver_integrationTest" target="1.8" />
|
||||
<module name="testing-node-driver_main" target="1.8" />
|
||||
<module name="testing-node-driver_test" target="1.8" />
|
||||
<module name="testing-smoke-test-utils_main" target="1.8" />
|
||||
<module name="testing-smoke-test-utils_test" target="1.8" />
|
||||
<module name="testing-test-common_main" target="1.8" />
|
||||
<module name="testing-test-common_test" target="1.8" />
|
||||
<module name="testing-test-utils_main" target="1.8" />
|
||||
<module name="testing-test-utils_test" target="1.8" />
|
||||
<module name="testing_main" target="1.8" />
|
||||
<module name="testing_test" target="1.8" />
|
||||
<module name="tools_main" target="1.8" />
|
||||
<module name="tools_test" target="1.8" />
|
||||
<module name="trader-demo_integrationTest" target="1.8" />
|
||||
@ -209,13 +164,12 @@
|
||||
<module name="verifier_integrationTest" target="1.8" />
|
||||
<module name="verifier_main" target="1.8" />
|
||||
<module name="verifier_test" target="1.8" />
|
||||
<module name="verify-enclave_integrationTest" target="1.8" />
|
||||
<module name="verify-enclave_main" target="1.8" />
|
||||
<module name="verify-enclave_test" target="1.8" />
|
||||
<module name="web_main" target="1.8" />
|
||||
<module name="web_test" target="1.8" />
|
||||
<module name="webcapsule_main" target="1.6" />
|
||||
<module name="webcapsule_test" target="1.6" />
|
||||
<module name="webserver-webcapsule_main" target="1.8" />
|
||||
<module name="webserver-webcapsule_test" target="1.8" />
|
||||
<module name="webserver_integrationTest" target="1.8" />
|
||||
<module name="webserver_main" target="1.8" />
|
||||
<module name="webserver_test" target="1.8" />
|
||||
|
@ -10,22 +10,21 @@
|
||||
|
||||
package net.corda.client.jackson
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore
|
||||
import com.fasterxml.jackson.annotation.JsonProperty
|
||||
import com.fasterxml.jackson.annotation.*
|
||||
import com.fasterxml.jackson.core.*
|
||||
import com.fasterxml.jackson.databind.*
|
||||
import com.fasterxml.jackson.databind.annotation.JsonDeserialize
|
||||
import com.fasterxml.jackson.databind.annotation.JsonSerialize
|
||||
import com.fasterxml.jackson.databind.deser.std.NumberDeserializers
|
||||
import com.fasterxml.jackson.databind.deser.std.StdDeserializer
|
||||
import com.fasterxml.jackson.databind.module.SimpleModule
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode
|
||||
import com.fasterxml.jackson.databind.ser.std.StdSerializer
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
|
||||
import com.fasterxml.jackson.module.kotlin.KotlinModule
|
||||
import com.fasterxml.jackson.module.kotlin.convertValue
|
||||
import net.corda.client.jackson.internal.addSerAndDeser
|
||||
import net.corda.client.jackson.internal.jsonObject
|
||||
import net.corda.client.jackson.internal.readValueAs
|
||||
import net.corda.core.CordaInternal
|
||||
import net.corda.core.CordaOID
|
||||
import net.corda.core.DoNotImplement
|
||||
import net.corda.core.contracts.Amount
|
||||
import net.corda.core.contracts.ContractState
|
||||
@ -33,24 +32,30 @@ import net.corda.core.contracts.StateRef
|
||||
import net.corda.core.crypto.*
|
||||
import net.corda.core.crypto.TransactionSignature
|
||||
import net.corda.core.identity.*
|
||||
import net.corda.core.internal.CertRole
|
||||
import net.corda.core.internal.DigitalSignatureWithCert
|
||||
import net.corda.core.internal.VisibleForTesting
|
||||
import net.corda.core.internal.uncheckedCast
|
||||
import net.corda.core.messaging.CordaRPCOps
|
||||
import net.corda.core.node.NodeInfo
|
||||
import net.corda.core.node.services.IdentityService
|
||||
import net.corda.core.serialization.SerializedBytes
|
||||
import net.corda.core.serialization.deserialize
|
||||
import net.corda.core.serialization.serialize
|
||||
import net.corda.core.transactions.CoreTransaction
|
||||
import net.corda.core.transactions.NotaryChangeWireTransaction
|
||||
import net.corda.core.transactions.SignedTransaction
|
||||
import net.corda.core.transactions.WireTransaction
|
||||
import net.corda.core.utilities.NetworkHostAndPort
|
||||
import net.corda.core.utilities.OpaqueBytes
|
||||
import net.corda.core.utilities.parsePublicKeyBase58
|
||||
import net.corda.core.utilities.toBase58String
|
||||
import net.corda.core.utilities.*
|
||||
import org.bouncycastle.asn1.x509.KeyPurposeId
|
||||
import java.lang.reflect.Modifier
|
||||
import java.math.BigDecimal
|
||||
import java.security.PublicKey
|
||||
import java.security.cert.CertPath
|
||||
import java.security.cert.CertificateFactory
|
||||
import java.security.cert.X509Certificate
|
||||
import java.util.*
|
||||
import javax.security.auth.x500.X500Principal
|
||||
|
||||
/**
|
||||
* Utilities and serialisers for working with JSON representations of basic types. This adds Jackson support for
|
||||
@ -100,25 +105,26 @@ object JacksonSupport {
|
||||
|
||||
val cordaModule: Module by lazy {
|
||||
SimpleModule("core").apply {
|
||||
addSerAndDeser(AnonymousPartySerializer, AnonymousPartyDeserializer)
|
||||
addSerAndDeser(PartySerializer, PartyDeserializer)
|
||||
addDeserializer(AbstractParty::class.java, PartyDeserializer)
|
||||
addSerAndDeser<BigDecimal>(toStringSerializer, NumberDeserializers.BigDecimalDeserializer())
|
||||
addSerAndDeser<SecureHash.SHA256>(toStringSerializer, SecureHashDeserializer())
|
||||
addSerAndDeser(toStringSerializer, AmountDeserializer)
|
||||
addSerAndDeser(OpaqueBytesSerializer, OpaqueBytesDeserializer)
|
||||
addSerAndDeser(toStringSerializer, CordaX500NameDeserializer)
|
||||
addSerAndDeser(PublicKeySerializer, PublicKeyDeserializer)
|
||||
addDeserializer(CompositeKey::class.java, CompositeKeyDeseriaizer)
|
||||
addSerAndDeser(toStringSerializer, NetworkHostAndPortDeserializer)
|
||||
// TODO Add deserialization which follows the same lookup logic as Party
|
||||
addSerializer(PartyAndCertificate::class.java, PartyAndCertificateSerializer)
|
||||
addDeserializer(NodeInfo::class.java, NodeInfoDeserializer)
|
||||
|
||||
listOf(TransactionSignatureSerde, SignedTransactionSerde).forEach { serde -> serde.applyTo(this) }
|
||||
|
||||
// Using mixins to fine-tune the default serialised output
|
||||
setMixInAnnotation(BigDecimal::class.java, BigDecimalMixin::class.java)
|
||||
setMixInAnnotation(X500Principal::class.java, X500PrincipalMixin::class.java)
|
||||
setMixInAnnotation(X509Certificate::class.java, X509CertificateMixin::class.java)
|
||||
setMixInAnnotation(PartyAndCertificate::class.java, PartyAndCertificateSerializerMixin::class.java)
|
||||
setMixInAnnotation(NetworkHostAndPort::class.java, NetworkHostAndPortMixin::class.java)
|
||||
setMixInAnnotation(CordaX500Name::class.java, CordaX500NameMixin::class.java)
|
||||
setMixInAnnotation(Amount::class.java, AmountMixin::class.java)
|
||||
setMixInAnnotation(AbstractParty::class.java, AbstractPartyMixin::class.java)
|
||||
setMixInAnnotation(AnonymousParty::class.java, AnonymousPartyMixin::class.java)
|
||||
setMixInAnnotation(Party::class.java, PartyMixin::class.java)
|
||||
setMixInAnnotation(PublicKey::class.java, PublicKeyMixin::class.java)
|
||||
setMixInAnnotation(ByteSequence::class.java, ByteSequenceMixin::class.java)
|
||||
setMixInAnnotation(SecureHash.SHA256::class.java, SecureHashSHA256Mixin::class.java)
|
||||
setMixInAnnotation(SerializedBytes::class.java, SerializedBytesMixin::class.java)
|
||||
setMixInAnnotation(DigitalSignature.WithKey::class.java, ByteSequenceWithPropertiesMixin::class.java)
|
||||
setMixInAnnotation(DigitalSignatureWithCert::class.java, ByteSequenceWithPropertiesMixin::class.java)
|
||||
setMixInAnnotation(TransactionSignature::class.java, ByteSequenceWithPropertiesMixin::class.java)
|
||||
setMixInAnnotation(SignedTransaction::class.java, SignedTransactionMixin2::class.java)
|
||||
setMixInAnnotation(WireTransaction::class.java, WireTransactionMixin::class.java)
|
||||
setMixInAnnotation(CertPath::class.java, CertPathMixin::class.java)
|
||||
setMixInAnnotation(NodeInfo::class.java, NodeInfoMixin::class.java)
|
||||
}
|
||||
}
|
||||
@ -181,7 +187,13 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
private val toStringSerializer = com.fasterxml.jackson.databind.ser.std.ToStringSerializer.instance
|
||||
@JacksonAnnotationsInside
|
||||
@JsonSerialize(using = com.fasterxml.jackson.databind.ser.std.ToStringSerializer::class)
|
||||
private annotation class ToStringSerialize
|
||||
|
||||
@ToStringSerialize
|
||||
@JsonDeserialize(using = NumberDeserializers.BigDecimalDeserializer::class)
|
||||
private interface BigDecimalMixin
|
||||
|
||||
private object DateSerializer : JsonSerializer<Date>() {
|
||||
override fun serialize(value: Date, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
@ -189,20 +201,21 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
private object NetworkHostAndPortDeserializer : JsonDeserializer<NetworkHostAndPort>() {
|
||||
@ToStringSerialize
|
||||
@JsonDeserialize(using = NetworkHostAndPortDeserializer::class)
|
||||
private interface NetworkHostAndPortMixin
|
||||
|
||||
private class NetworkHostAndPortDeserializer : JsonDeserializer<NetworkHostAndPort>() {
|
||||
override fun deserialize(parser: JsonParser, ctxt: DeserializationContext): NetworkHostAndPort {
|
||||
return NetworkHostAndPort.parse(parser.text)
|
||||
}
|
||||
}
|
||||
|
||||
private object CompositeKeyDeseriaizer : JsonDeserializer<CompositeKey>() {
|
||||
override fun deserialize(parser: JsonParser, ctxt: DeserializationContext): CompositeKey {
|
||||
val publicKey = parser.readValueAs<PublicKey>()
|
||||
return publicKey as? CompositeKey ?: throw JsonParseException(parser, "Not a CompositeKey: $publicKey")
|
||||
}
|
||||
}
|
||||
@JsonSerialize(using = PartyAndCertificateSerializer::class)
|
||||
// TODO Add deserialization which follows the same lookup logic as Party
|
||||
private interface PartyAndCertificateSerializerMixin
|
||||
|
||||
private object PartyAndCertificateSerializer : JsonSerializer<PartyAndCertificate>() {
|
||||
private class PartyAndCertificateSerializer : JsonSerializer<PartyAndCertificate>() {
|
||||
override fun serialize(value: PartyAndCertificate, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.jsonObject {
|
||||
writeObjectField("name", value.name)
|
||||
@ -212,100 +225,146 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
@Suppress("unused")
|
||||
private interface NodeInfoMixin {
|
||||
@get:JsonIgnore val legalIdentities: Any // This is already covered by legalIdentitiesAndCerts
|
||||
@JsonSerialize(using = SignedTransactionSerializer::class)
|
||||
@JsonDeserialize(using = SignedTransactionDeserializer::class)
|
||||
private interface SignedTransactionMixin2
|
||||
|
||||
private class SignedTransactionSerializer : JsonSerializer<SignedTransaction>() {
|
||||
override fun serialize(value: SignedTransaction, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.writeObject(SignedTransactionWrapper(value.txBits.bytes, value.sigs))
|
||||
}
|
||||
}
|
||||
|
||||
private interface JsonSerde<TYPE> {
|
||||
val type: Class<TYPE>
|
||||
val serializer: JsonSerializer<TYPE>
|
||||
val deserializer: JsonDeserializer<TYPE>
|
||||
private class SignedTransactionDeserializer : JsonDeserializer<SignedTransaction>() {
|
||||
override fun deserialize(parser: JsonParser, ctxt: DeserializationContext): SignedTransaction {
|
||||
val wrapper = parser.readValueAs<SignedTransactionWrapper>()
|
||||
return SignedTransaction(SerializedBytes(wrapper.txBits), wrapper.signatures)
|
||||
}
|
||||
}
|
||||
|
||||
fun applyTo(module: SimpleModule) {
|
||||
with(module) {
|
||||
addSerializer(type, serializer)
|
||||
addDeserializer(type, deserializer)
|
||||
private class SignedTransactionWrapper(val txBits: ByteArray, val signatures: List<TransactionSignature>)
|
||||
|
||||
@JsonSerialize(using = SerializedBytesSerializer::class)
|
||||
@JsonDeserialize(using = SerializedBytesDeserializer::class)
|
||||
private class SerializedBytesMixin
|
||||
|
||||
private class SerializedBytesSerializer : JsonSerializer<SerializedBytes<*>>() {
|
||||
override fun serialize(value: SerializedBytes<*>, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
val deserialized = value.deserialize<Any>()
|
||||
gen.jsonObject {
|
||||
writeStringField("class", deserialized.javaClass.name)
|
||||
writeObjectField("deserialized", deserialized)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private inline fun <reified RESULT> JsonNode.get(fieldName: String, condition: (JsonNode) -> Boolean, mapper: ObjectMapper, parser: JsonParser): RESULT {
|
||||
if (get(fieldName)?.let(condition) != true) {
|
||||
JsonParseException(parser, "Missing required object field \"$fieldName\".")
|
||||
}
|
||||
return mapper.treeToValue(get(fieldName), RESULT::class.java)
|
||||
}
|
||||
|
||||
private object TransactionSignatureSerde : JsonSerde<TransactionSignature> {
|
||||
override val type: Class<TransactionSignature> = TransactionSignature::class.java
|
||||
|
||||
override val serializer = object : StdSerializer<TransactionSignature>(type) {
|
||||
override fun serialize(value: TransactionSignature, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.jsonObject {
|
||||
writeObjectField("by", value.by)
|
||||
writeObjectField("signatureMetadata", value.signatureMetadata)
|
||||
writeObjectField("bytes", value.bytes)
|
||||
writeObjectField("partialMerkleTree", value.partialMerkleTree)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override val deserializer = object : StdDeserializer<TransactionSignature>(type) {
|
||||
override fun deserialize(parser: JsonParser, context: DeserializationContext): TransactionSignature {
|
||||
private class SerializedBytesDeserializer : JsonDeserializer<SerializedBytes<*>>() {
|
||||
override fun deserialize(parser: JsonParser, context: DeserializationContext): SerializedBytes<Any> {
|
||||
return if (parser.currentToken == JsonToken.START_OBJECT) {
|
||||
val mapper = parser.codec as ObjectMapper
|
||||
val json = mapper.readTree<JsonNode>(parser)
|
||||
val by = mapper.convertValue<PublicKey>(json["by"])
|
||||
val signatureMetadata = json.get<SignatureMetadata>("signatureMetadata", JsonNode::isObject, mapper, parser)
|
||||
val bytes = json.get<ByteArray>("bytes", JsonNode::isObject, mapper, parser)
|
||||
val partialMerkleTree = json.get<PartialMerkleTree>("partialMerkleTree", JsonNode::isObject, mapper, parser)
|
||||
|
||||
return TransactionSignature(bytes, by, signatureMetadata, partialMerkleTree)
|
||||
val json = parser.readValueAsTree<ObjectNode>()
|
||||
val clazz = context.findClass(json["class"].textValue())
|
||||
val pojo = mapper.convertValue(json["deserialized"], clazz)
|
||||
pojo.serialize()
|
||||
} else {
|
||||
SerializedBytes(parser.binaryValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private object SignedTransactionSerde : JsonSerde<SignedTransaction> {
|
||||
override val type: Class<SignedTransaction> = SignedTransaction::class.java
|
||||
@ToStringSerialize
|
||||
private interface X500PrincipalMixin
|
||||
|
||||
override val serializer = object : StdSerializer<SignedTransaction>(type) {
|
||||
override fun serialize(value: SignedTransaction, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.jsonObject {
|
||||
writeObjectField("txBits", value.txBits.bytes)
|
||||
writeObjectField("signatures", value.sigs)
|
||||
@JsonSerialize(using = X509CertificateSerializer::class)
|
||||
@JsonDeserialize(using = X509CertificateDeserializer::class)
|
||||
private interface X509CertificateMixin
|
||||
|
||||
private object X509CertificateSerializer : JsonSerializer<X509Certificate>() {
|
||||
val keyUsages = arrayOf(
|
||||
"digitalSignature",
|
||||
"nonRepudiation",
|
||||
"keyEncipherment",
|
||||
"dataEncipherment",
|
||||
"keyAgreement",
|
||||
"keyCertSign",
|
||||
"cRLSign",
|
||||
"encipherOnly",
|
||||
"decipherOnly"
|
||||
)
|
||||
|
||||
val keyPurposeIds = KeyPurposeId::class.java
|
||||
.fields
|
||||
.filter { Modifier.isStatic(it.modifiers) && it.type == KeyPurposeId::class.java }
|
||||
.associateBy({ (it.get(null) as KeyPurposeId).id }, { it.name })
|
||||
|
||||
val knownExtensions = setOf("2.5.29.15", "2.5.29.37", "2.5.29.19", "2.5.29.17", "2.5.29.18", CordaOID.X509_EXTENSION_CORDA_ROLE)
|
||||
|
||||
override fun serialize(value: X509Certificate, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.jsonObject {
|
||||
writeNumberField("version", value.version)
|
||||
writeObjectField("serialNumber", value.serialNumber)
|
||||
writeObjectField("subject", value.subjectX500Principal)
|
||||
writeObjectField("publicKey", value.publicKey)
|
||||
writeObjectField("issuer", value.issuerX500Principal)
|
||||
writeObjectField("notBefore", value.notBefore)
|
||||
writeObjectField("notAfter", value.notAfter)
|
||||
writeObjectField("issuerUniqueID", value.issuerUniqueID)
|
||||
writeObjectField("subjectUniqueID", value.subjectUniqueID)
|
||||
writeObjectField("keyUsage", value.keyUsage?.asList()?.mapIndexedNotNull { i, flag -> if (flag) keyUsages[i] else null })
|
||||
writeObjectField("extendedKeyUsage", value.extendedKeyUsage.map { keyPurposeIds.getOrDefault(it, it) })
|
||||
jsonObject("basicConstraints") {
|
||||
writeBooleanField("isCA", value.basicConstraints != -1)
|
||||
writeObjectField("pathLength", value.basicConstraints.let { if (it != Int.MAX_VALUE) it else null })
|
||||
}
|
||||
writeObjectField("subjectAlternativeNames", value.subjectAlternativeNames)
|
||||
writeObjectField("issuerAlternativeNames", value.issuerAlternativeNames)
|
||||
writeObjectField("cordaCertRole", CertRole.extract(value))
|
||||
writeObjectField("otherCriticalExtensions", value.criticalExtensionOIDs - knownExtensions)
|
||||
writeObjectField("otherNonCriticalExtensions", value.nonCriticalExtensionOIDs - knownExtensions)
|
||||
writeBinaryField("encoded", value.encoded)
|
||||
}
|
||||
}
|
||||
|
||||
override val deserializer = object : StdDeserializer<SignedTransaction>(type) {
|
||||
override fun deserialize(parser: JsonParser, context: DeserializationContext): SignedTransaction {
|
||||
val mapper = parser.codec as ObjectMapper
|
||||
val json = mapper.readTree<JsonNode>(parser)
|
||||
|
||||
val txBits = json.get<ByteArray>("txBits", JsonNode::isTextual, mapper, parser)
|
||||
val signatures = json.get<TransactionSignatures>("signatures", JsonNode::isArray, mapper, parser)
|
||||
|
||||
return SignedTransaction(SerializedBytes(txBits), signatures)
|
||||
}
|
||||
}
|
||||
|
||||
private class TransactionSignatures : ArrayList<TransactionSignature>()
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// The following should not have been made public and are thus deprecated with warnings.
|
||||
//
|
||||
|
||||
@Deprecated("No longer used as jackson already has a toString serializer",
|
||||
replaceWith = ReplaceWith("com.fasterxml.jackson.databind.ser.std.ToStringSerializer.instance"))
|
||||
object ToStringSerializer : JsonSerializer<Any>() {
|
||||
override fun serialize(obj: Any, generator: JsonGenerator, provider: SerializerProvider) {
|
||||
generator.writeString(obj.toString())
|
||||
private class X509CertificateDeserializer : JsonDeserializer<X509Certificate>() {
|
||||
private val certFactory = CertificateFactory.getInstance("X.509")
|
||||
override fun deserialize(parser: JsonParser, ctxt: DeserializationContext): X509Certificate {
|
||||
val encoded = parser.readValueAsTree<ObjectNode>()["encoded"]
|
||||
return certFactory.generateCertificate(encoded.binaryValue().inputStream()) as X509Certificate
|
||||
}
|
||||
}
|
||||
|
||||
@JsonSerialize(using = CertPathSerializer::class)
|
||||
@JsonDeserialize(using = CertPathDeserializer::class)
|
||||
private interface CertPathMixin
|
||||
|
||||
private class CertPathSerializer : JsonSerializer<CertPath>() {
|
||||
override fun serialize(value: CertPath, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.writeObject(CertPathWrapper(value.type, uncheckedCast(value.certificates)))
|
||||
}
|
||||
}
|
||||
|
||||
private class CertPathDeserializer : JsonDeserializer<CertPath>() {
|
||||
private val certFactory = CertificateFactory.getInstance("X.509")
|
||||
override fun deserialize(parser: JsonParser, ctxt: DeserializationContext): CertPath {
|
||||
val wrapper = parser.readValueAs<CertPathWrapper>()
|
||||
return certFactory.generateCertPath(wrapper.certificates)
|
||||
}
|
||||
}
|
||||
|
||||
private data class CertPathWrapper(val type: String, val certificates: List<X509Certificate>) {
|
||||
init {
|
||||
require(type == "X.509") { "Only X.509 cert paths are supported" }
|
||||
}
|
||||
}
|
||||
|
||||
@JsonDeserialize(using = PartyDeserializer::class)
|
||||
private interface AbstractPartyMixin
|
||||
|
||||
@JsonSerialize(using = AnonymousPartySerializer::class)
|
||||
@JsonDeserialize(using = AnonymousPartyDeserializer::class)
|
||||
private interface AnonymousPartyMixin
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object AnonymousPartySerializer : JsonSerializer<AnonymousParty>() {
|
||||
override fun serialize(value: AnonymousParty, generator: JsonGenerator, provider: SerializerProvider) {
|
||||
@ -320,6 +379,9 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
@JsonSerialize(using = PartySerializer::class)
|
||||
private interface PartyMixin
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object PartySerializer : JsonSerializer<Party>() {
|
||||
override fun serialize(value: Party, generator: JsonGenerator, provider: SerializerProvider) {
|
||||
@ -354,13 +416,9 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
// This is no longer used
|
||||
object CordaX500NameSerializer : JsonSerializer<CordaX500Name>() {
|
||||
override fun serialize(obj: CordaX500Name, generator: JsonGenerator, provider: SerializerProvider) {
|
||||
generator.writeString(obj.toString())
|
||||
}
|
||||
}
|
||||
@ToStringSerialize
|
||||
@JsonDeserialize(using = CordaX500NameDeserializer::class)
|
||||
private interface CordaX500NameMixin
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object CordaX500NameDeserializer : JsonDeserializer<CordaX500Name>() {
|
||||
@ -373,13 +431,9 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
// This is no longer used
|
||||
object NodeInfoSerializer : JsonSerializer<NodeInfo>() {
|
||||
override fun serialize(value: NodeInfo, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.writeString(Base58.encode(value.serialize().bytes))
|
||||
}
|
||||
}
|
||||
@JsonIgnoreProperties("legalIdentities") // This is already covered by legalIdentitiesAndCerts
|
||||
@JsonDeserialize(using = NodeInfoDeserializer::class)
|
||||
private interface NodeInfoMixin
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object NodeInfoDeserializer : JsonDeserializer<NodeInfo>() {
|
||||
@ -390,17 +444,10 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
// This is no longer used
|
||||
object SecureHashSerializer : JsonSerializer<SecureHash>() {
|
||||
override fun serialize(obj: SecureHash, generator: JsonGenerator, provider: SerializerProvider) {
|
||||
generator.writeString(obj.toString())
|
||||
}
|
||||
}
|
||||
@ToStringSerialize
|
||||
@JsonDeserialize(using = SecureHashDeserializer::class)
|
||||
private interface SecureHashSHA256Mixin
|
||||
|
||||
/**
|
||||
* Implemented as a class so that we can instantiate for T.
|
||||
*/
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
class SecureHashDeserializer<T : SecureHash> : JsonDeserializer<T>() {
|
||||
override fun deserialize(parser: JsonParser, context: DeserializationContext): T {
|
||||
@ -412,6 +459,10 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
@JsonSerialize(using = PublicKeySerializer::class)
|
||||
@JsonDeserialize(using = PublicKeyDeserializer::class)
|
||||
private interface PublicKeyMixin
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object PublicKeySerializer : JsonSerializer<PublicKey>() {
|
||||
override fun serialize(value: PublicKey, generator: JsonGenerator, provider: SerializerProvider) {
|
||||
@ -430,13 +481,9 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
// This is no longer used
|
||||
object AmountSerializer : JsonSerializer<Amount<*>>() {
|
||||
override fun serialize(value: Amount<*>, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.writeString(value.toString())
|
||||
}
|
||||
}
|
||||
@ToStringSerialize
|
||||
@JsonDeserialize(using = AmountDeserializer::class)
|
||||
private interface AmountMixin
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object AmountDeserializer : JsonDeserializer<Amount<*>>() {
|
||||
@ -444,20 +491,30 @@ object JacksonSupport {
|
||||
return if (parser.currentToken == JsonToken.VALUE_STRING) {
|
||||
Amount.parseCurrency(parser.text)
|
||||
} else {
|
||||
try {
|
||||
val tree = parser.readValueAsTree<ObjectNode>()
|
||||
val quantity = tree["quantity"].apply { require(canConvertToLong()) }
|
||||
val token = tree["token"]
|
||||
// Attempt parsing as a currency token. TODO: This needs thought about how to extend to other token types.
|
||||
val currency = (parser.codec as ObjectMapper).convertValue<Currency>(token)
|
||||
Amount(quantity.longValue(), currency)
|
||||
} catch (e: Exception) {
|
||||
throw JsonParseException(parser, "Invalid amount", e)
|
||||
}
|
||||
val wrapper = parser.readValueAs<CurrencyAmountWrapper>()
|
||||
Amount(wrapper.quantity, wrapper.token)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private data class CurrencyAmountWrapper(val quantity: Long, val token: Currency)
|
||||
|
||||
@JsonDeserialize(using = OpaqueBytesDeserializer::class)
|
||||
private interface ByteSequenceMixin {
|
||||
@Suppress("unused")
|
||||
@JsonValue
|
||||
fun copyBytes(): ByteArray
|
||||
}
|
||||
|
||||
@JsonIgnoreProperties("offset", "size")
|
||||
@JsonSerialize
|
||||
@JsonDeserialize
|
||||
private interface ByteSequenceWithPropertiesMixin {
|
||||
@Suppress("unused")
|
||||
@JsonValue(false)
|
||||
fun copyBytes(): ByteArray
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object OpaqueBytesDeserializer : JsonDeserializer<OpaqueBytes>() {
|
||||
override fun deserialize(parser: JsonParser, ctxt: DeserializationContext): OpaqueBytes {
|
||||
@ -465,6 +522,47 @@ object JacksonSupport {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Everything below this point is no longer used but can't be deleted as they leaked into the public API
|
||||
//
|
||||
|
||||
@Deprecated("No longer used as jackson already has a toString serializer",
|
||||
replaceWith = ReplaceWith("com.fasterxml.jackson.databind.ser.std.ToStringSerializer.instance"))
|
||||
object ToStringSerializer : JsonSerializer<Any>() {
|
||||
override fun serialize(obj: Any, generator: JsonGenerator, provider: SerializerProvider) {
|
||||
generator.writeString(obj.toString())
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object CordaX500NameSerializer : JsonSerializer<CordaX500Name>() {
|
||||
override fun serialize(obj: CordaX500Name, generator: JsonGenerator, provider: SerializerProvider) {
|
||||
generator.writeString(obj.toString())
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object NodeInfoSerializer : JsonSerializer<NodeInfo>() {
|
||||
override fun serialize(value: NodeInfo, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.writeString(Base58.encode(value.serialize().bytes))
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object SecureHashSerializer : JsonSerializer<SecureHash>() {
|
||||
override fun serialize(obj: SecureHash, generator: JsonGenerator, provider: SerializerProvider) {
|
||||
generator.writeString(obj.toString())
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object AmountSerializer : JsonSerializer<Amount<*>>() {
|
||||
override fun serialize(value: Amount<*>, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
gen.writeString(value.toString())
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated("This is an internal class, do not use")
|
||||
object OpaqueBytesSerializer : JsonSerializer<OpaqueBytes>() {
|
||||
override fun serialize(value: OpaqueBytes, gen: JsonGenerator, serializers: SerializerProvider) {
|
||||
@ -477,7 +575,7 @@ object JacksonSupport {
|
||||
abstract class SignedTransactionMixin {
|
||||
@JsonIgnore abstract fun getTxBits(): SerializedBytes<CoreTransaction>
|
||||
@JsonProperty("signatures") protected abstract fun getSigs(): List<TransactionSignature>
|
||||
@JsonProperty protected abstract fun getTransaction(): CoreTransaction // TODO It seems this should be coreTransaction
|
||||
@JsonProperty protected abstract fun getTransaction(): CoreTransaction
|
||||
@JsonIgnore abstract fun getTx(): WireTransaction
|
||||
@JsonIgnore abstract fun getNotaryChangeTx(): NotaryChangeWireTransaction
|
||||
@JsonIgnore abstract fun getInputs(): List<StateRef>
|
||||
|
@ -3,8 +3,11 @@ package net.corda.client.jackson.internal
|
||||
import com.fasterxml.jackson.core.JsonGenerator
|
||||
import com.fasterxml.jackson.core.JsonParser
|
||||
import com.fasterxml.jackson.databind.JsonDeserializer
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import com.fasterxml.jackson.databind.JsonSerializer
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import com.fasterxml.jackson.databind.module.SimpleModule
|
||||
import com.fasterxml.jackson.module.kotlin.convertValue
|
||||
|
||||
inline fun <reified T : Any> SimpleModule.addSerAndDeser(serializer: JsonSerializer<in T>, deserializer: JsonDeserializer<T>) {
|
||||
addSerializer(T::class.java, serializer)
|
||||
@ -19,3 +22,5 @@ inline fun JsonGenerator.jsonObject(fieldName: String? = null, gen: JsonGenerato
|
||||
}
|
||||
|
||||
inline fun <reified T> JsonParser.readValueAs(): T = readValueAs(T::class.java)
|
||||
|
||||
inline fun <reified T : Any> JsonNode.valueAs(mapper: ObjectMapper): T = mapper.convertValue(this)
|
||||
|
@ -10,14 +10,16 @@
|
||||
|
||||
package net.corda.client.jackson
|
||||
|
||||
import com.fasterxml.jackson.databind.SerializationFeature
|
||||
import com.fasterxml.jackson.databind.node.ArrayNode
|
||||
import com.fasterxml.jackson.core.JsonFactory
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import com.fasterxml.jackson.databind.node.BinaryNode
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode
|
||||
import com.fasterxml.jackson.databind.node.TextNode
|
||||
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
|
||||
import com.fasterxml.jackson.module.kotlin.convertValue
|
||||
import com.nhaarman.mockito_kotlin.doReturn
|
||||
import com.nhaarman.mockito_kotlin.whenever
|
||||
import net.corda.client.jackson.internal.valueAs
|
||||
import net.corda.core.contracts.Amount
|
||||
import net.corda.core.cordapp.CordappProvider
|
||||
import net.corda.core.crypto.*
|
||||
@ -26,14 +28,16 @@ import net.corda.core.identity.AbstractParty
|
||||
import net.corda.core.identity.AnonymousParty
|
||||
import net.corda.core.identity.CordaX500Name
|
||||
import net.corda.core.identity.Party
|
||||
import net.corda.core.internal.DigitalSignatureWithCert
|
||||
import net.corda.core.node.NodeInfo
|
||||
import net.corda.core.node.ServiceHub
|
||||
import net.corda.core.serialization.CordaSerializable
|
||||
import net.corda.core.serialization.SerializedBytes
|
||||
import net.corda.core.serialization.serialize
|
||||
import net.corda.core.transactions.SignedTransaction
|
||||
import net.corda.core.utilities.NetworkHostAndPort
|
||||
import net.corda.core.utilities.OpaqueBytes
|
||||
import net.corda.core.utilities.toBase58String
|
||||
import net.corda.core.utilities.toBase64
|
||||
import net.corda.core.utilities.*
|
||||
import net.corda.finance.USD
|
||||
import net.corda.nodeapi.internal.crypto.x509Certificates
|
||||
import net.corda.testing.common.internal.testNetworkParameters
|
||||
import net.corda.testing.contracts.DummyContract
|
||||
import net.corda.testing.core.*
|
||||
@ -44,19 +48,29 @@ import org.assertj.core.api.Assertions.assertThatThrownBy
|
||||
import org.junit.Before
|
||||
import org.junit.Rule
|
||||
import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
import org.junit.runners.Parameterized
|
||||
import org.junit.runners.Parameterized.Parameters
|
||||
import java.math.BigInteger
|
||||
import java.security.PublicKey
|
||||
import java.security.cert.CertPath
|
||||
import java.security.cert.X509Certificate
|
||||
import java.util.*
|
||||
import javax.security.auth.x500.X500Principal
|
||||
import kotlin.collections.ArrayList
|
||||
import kotlin.test.assertEquals
|
||||
|
||||
class JacksonSupportTest {
|
||||
@RunWith(Parameterized::class)
|
||||
class JacksonSupportTest(@Suppress("unused") private val name: String, factory: JsonFactory) {
|
||||
private companion object {
|
||||
val SEED: BigInteger = BigInteger.valueOf(20170922L)
|
||||
val ALICE_PUBKEY = TestIdentity(ALICE_NAME, 70).publicKey
|
||||
val BOB_PUBKEY = TestIdentity(BOB_NAME, 70).publicKey
|
||||
val DUMMY_NOTARY = TestIdentity(DUMMY_NOTARY_NAME, 20).party
|
||||
val MINI_CORP = TestIdentity(CordaX500Name("MiniCorp", "London", "GB"))
|
||||
|
||||
@Parameters(name = "{0}")
|
||||
@JvmStatic
|
||||
fun factories() = arrayOf(arrayOf("JSON", JsonFactory()), arrayOf("YAML", YAMLFactory()))
|
||||
}
|
||||
|
||||
@Rule
|
||||
@ -64,7 +78,7 @@ class JacksonSupportTest {
|
||||
val testSerialization = SerializationEnvironmentRule()
|
||||
|
||||
private val partyObjectMapper = TestPartyObjectMapper()
|
||||
private val mapper = JacksonSupport.createPartyObjectMapper(partyObjectMapper)
|
||||
private val mapper = JacksonSupport.createPartyObjectMapper(partyObjectMapper, factory)
|
||||
|
||||
private lateinit var services: ServiceHub
|
||||
private lateinit var cordappProvider: CordappProvider
|
||||
@ -76,44 +90,29 @@ class JacksonSupportTest {
|
||||
doReturn(cordappProvider).whenever(services).cordappProvider
|
||||
}
|
||||
|
||||
private class Dummy(val notional: Amount<Currency>)
|
||||
|
||||
@Test
|
||||
fun `read Amount`() {
|
||||
val oldJson = """
|
||||
{
|
||||
"notional": {
|
||||
"quantity": 2500000000,
|
||||
"token": "USD"
|
||||
}
|
||||
}
|
||||
"""
|
||||
val newJson = """ { "notional" : "$25000000" } """
|
||||
|
||||
assertEquals(Amount(2500000000L, USD), mapper.readValue(newJson, Dummy::class.java).notional)
|
||||
assertEquals(Amount(2500000000L, USD), mapper.readValue(oldJson, Dummy::class.java).notional)
|
||||
fun `Amount(Currency) serialization`() {
|
||||
assertThat(mapper.valueToTree<TextNode>(Amount.parseCurrency("£25000000")).textValue()).isEqualTo("25000000.00 GBP")
|
||||
assertThat(mapper.valueToTree<TextNode>(Amount.parseCurrency("$250000")).textValue()).isEqualTo("250000.00 USD")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `write Amount`() {
|
||||
val writer = mapper.writer().without(SerializationFeature.INDENT_OUTPUT)
|
||||
assertEquals("""{"notional":"25000000.00 GBP"}""", writer.writeValueAsString(Dummy(Amount.parseCurrency("£25000000"))))
|
||||
assertEquals("""{"notional":"250000.00 USD"}""", writer.writeValueAsString(Dummy(Amount.parseCurrency("$250000"))))
|
||||
fun `Amount(Currency) deserialization`() {
|
||||
val old = mapOf(
|
||||
"quantity" to 2500000000,
|
||||
"token" to "USD"
|
||||
)
|
||||
assertThat(mapper.convertValue<Amount<Currency>>(old)).isEqualTo(Amount(2_500_000_000, USD))
|
||||
assertThat(mapper.convertValue<Amount<Currency>>(TextNode("$25000000"))).isEqualTo(Amount(2_500_000_000, USD))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun SignedTransaction() {
|
||||
val attachmentRef = SecureHash.randomSHA256()
|
||||
doReturn(attachmentRef).whenever(cordappProvider).getContractAttachmentID(DummyContract.PROGRAM_ID)
|
||||
doReturn(testNetworkParameters()).whenever(services).networkParameters
|
||||
|
||||
val writer = mapper.writer()
|
||||
val stx = makeDummyStx()
|
||||
val json = writer.writeValueAsString(stx)
|
||||
|
||||
val deserializedTransaction = mapper.readValue(json, SignedTransaction::class.java)
|
||||
|
||||
assertThat(deserializedTransaction).isEqualTo(stx)
|
||||
fun ByteSequence() {
|
||||
val byteSequence: ByteSequence = OpaqueBytes.of(1, 2, 3, 4).subSequence(0, 2)
|
||||
val json = mapper.valueToTree<BinaryNode>(byteSequence)
|
||||
assertThat(json.binaryValue()).containsExactly(1, 2)
|
||||
assertThat(json.asText()).isEqualTo(byteArrayOf(1, 2).toBase64())
|
||||
assertThat(mapper.convertValue<ByteSequence>(json)).isEqualTo(byteSequence)
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -125,6 +124,105 @@ class JacksonSupportTest {
|
||||
assertThat(mapper.convertValue<OpaqueBytes>(json)).isEqualTo(opaqueBytes)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun SerializedBytes() {
|
||||
val data = TestData(BOB_NAME, "Summary", SubTestData(1234))
|
||||
val serializedBytes = data.serialize()
|
||||
val json = mapper.valueToTree<ObjectNode>(serializedBytes)
|
||||
println(mapper.writeValueAsString(json))
|
||||
assertThat(json["class"].textValue()).isEqualTo(TestData::class.java.name)
|
||||
assertThat(json["deserialized"].valueAs<TestData>(mapper)).isEqualTo(data)
|
||||
// Check that the entire JSON object can be converted back to the same SerializedBytes
|
||||
assertThat(mapper.convertValue<SerializedBytes<*>>(json)).isEqualTo(serializedBytes)
|
||||
assertThat(mapper.convertValue<SerializedBytes<*>>(BinaryNode(serializedBytes.bytes))).isEqualTo(serializedBytes)
|
||||
}
|
||||
|
||||
// This is the class that was used to serialise the message for the test below. It's commented out so that it's no
|
||||
// longer on the classpath.
|
||||
// @CordaSerializable
|
||||
// data class ClassNotOnClasspath(val name: CordaX500Name, val value: Int)
|
||||
|
||||
@Test
|
||||
fun `SerializedBytes of class not on classpath`() {
|
||||
// The contents of the file were written out as follows:
|
||||
// ClassNotOnClasspath(BOB_NAME, 54321).serialize().open().copyTo("build" / "class-not-on-classpath-data")
|
||||
|
||||
val serializedBytes = SerializedBytes<Any>(javaClass.getResource("class-not-on-classpath-data").readBytes())
|
||||
val json = mapper.valueToTree<ObjectNode>(serializedBytes)
|
||||
println(mapper.writeValueAsString(json))
|
||||
assertThat(json["class"].textValue()).isEqualTo("net.corda.client.jackson.JacksonSupportTest\$ClassNotOnClasspath")
|
||||
assertThat(json["deserialized"].valueAs<Map<*, *>>(mapper)).isEqualTo(mapOf(
|
||||
"name" to BOB_NAME.toString(),
|
||||
"value" to 54321
|
||||
))
|
||||
assertThat(mapper.convertValue<SerializedBytes<*>>(BinaryNode(serializedBytes.bytes))).isEqualTo(serializedBytes)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun DigitalSignature() {
|
||||
val digitalSignature = DigitalSignature(secureRandomBytes(128))
|
||||
val json = mapper.valueToTree<BinaryNode>(digitalSignature)
|
||||
assertThat(json.binaryValue()).isEqualTo(digitalSignature.bytes)
|
||||
assertThat(json.asText()).isEqualTo(digitalSignature.bytes.toBase64())
|
||||
assertThat(mapper.convertValue<DigitalSignature>(json)).isEqualTo(digitalSignature)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `DigitalSignature WithKey`() {
|
||||
val digitalSignature = DigitalSignature.WithKey(BOB_PUBKEY, secureRandomBytes(128))
|
||||
val json = mapper.valueToTree<ObjectNode>(digitalSignature)
|
||||
val (by, bytes) = json.assertHasOnlyFields("by", "bytes")
|
||||
assertThat(by.valueAs<PublicKey>(mapper)).isEqualTo(BOB_PUBKEY)
|
||||
assertThat(bytes.binaryValue()).isEqualTo(digitalSignature.bytes)
|
||||
assertThat(mapper.convertValue<DigitalSignature.WithKey>(json)).isEqualTo(digitalSignature)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun DigitalSignatureWithCert() {
|
||||
val digitalSignature = DigitalSignatureWithCert(MINI_CORP.identity.certificate, secureRandomBytes(128))
|
||||
val json = mapper.valueToTree<ObjectNode>(digitalSignature)
|
||||
val (by, bytes) = json.assertHasOnlyFields("by", "bytes")
|
||||
assertThat(by.valueAs<X509Certificate>(mapper)).isEqualTo(MINI_CORP.identity.certificate)
|
||||
assertThat(bytes.binaryValue()).isEqualTo(digitalSignature.bytes)
|
||||
assertThat(mapper.convertValue<DigitalSignatureWithCert>(json)).isEqualTo(digitalSignature)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun TransactionSignature() {
|
||||
val metadata = SignatureMetadata(1, 1)
|
||||
val transactionSignature = TransactionSignature(secureRandomBytes(128), BOB_PUBKEY, metadata)
|
||||
val json = mapper.valueToTree<ObjectNode>(transactionSignature)
|
||||
val (bytes, by, signatureMetadata, partialMerkleTree) = json.assertHasOnlyFields(
|
||||
"bytes",
|
||||
"by",
|
||||
"signatureMetadata",
|
||||
"partialMerkleTree"
|
||||
)
|
||||
assertThat(bytes.binaryValue()).isEqualTo(transactionSignature.bytes)
|
||||
assertThat(by.valueAs<PublicKey>(mapper)).isEqualTo(BOB_PUBKEY)
|
||||
assertThat(signatureMetadata.valueAs<SignatureMetadata>(mapper)).isEqualTo(metadata)
|
||||
assertThat(partialMerkleTree.isNull).isTrue()
|
||||
assertThat(mapper.convertValue<TransactionSignature>(json)).isEqualTo(transactionSignature)
|
||||
}
|
||||
|
||||
// TODO Add test for PartialMerkleTree
|
||||
|
||||
@Test
|
||||
fun SignedTransaction() {
|
||||
val attachmentRef = SecureHash.randomSHA256()
|
||||
doReturn(attachmentRef).whenever(cordappProvider).getContractAttachmentID(DummyContract.PROGRAM_ID)
|
||||
doReturn(testNetworkParameters()).whenever(services).networkParameters
|
||||
|
||||
val stx = makeDummyStx()
|
||||
val json = mapper.valueToTree<ObjectNode>(stx)
|
||||
println(mapper.writeValueAsString(json))
|
||||
val (txBits, signatures) = json.assertHasOnlyFields("txBits", "signatures")
|
||||
assertThat(txBits.binaryValue()).isEqualTo(stx.txBits.bytes)
|
||||
val sigs = signatures.elements().asSequence().map { it.valueAs<TransactionSignature>(mapper) }.toList()
|
||||
assertThat(sigs).isEqualTo(stx.sigs)
|
||||
assertThat(mapper.convertValue<SignedTransaction>(json)).isEqualTo(stx)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun CordaX500Name() {
|
||||
testToStringSerialisation(CordaX500Name(commonName = "COMMON", organisationUnit = "ORG UNIT", organisation = "ORG", locality = "NYC", state = "NY", country = "US"))
|
||||
@ -221,31 +319,40 @@ class JacksonSupportTest {
|
||||
|
||||
@Test
|
||||
fun AnonymousParty() {
|
||||
val anon = AnonymousParty(ALICE_PUBKEY)
|
||||
val json = mapper.valueToTree<TextNode>(anon)
|
||||
val anonymousParty = AnonymousParty(ALICE_PUBKEY)
|
||||
val json = mapper.valueToTree<TextNode>(anonymousParty)
|
||||
assertThat(json.textValue()).isEqualTo(ALICE_PUBKEY.toBase58String())
|
||||
assertThat(mapper.convertValue<AnonymousParty>(json)).isEqualTo(anon)
|
||||
assertThat(mapper.convertValue<AnonymousParty>(json)).isEqualTo(anonymousParty)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `PartyAndCertificate serialisation`() {
|
||||
val json = mapper.valueToTree<ObjectNode>(MINI_CORP.identity)
|
||||
assertThat(json.fieldNames()).containsOnly("name", "owningKey")
|
||||
assertThat(mapper.convertValue<CordaX500Name>(json["name"])).isEqualTo(MINI_CORP.name)
|
||||
assertThat(mapper.convertValue<PublicKey>(json["owningKey"])).isEqualTo(MINI_CORP.publicKey)
|
||||
val (name, owningKey) = json.assertHasOnlyFields("name", "owningKey")
|
||||
assertThat(name.valueAs<CordaX500Name>(mapper)).isEqualTo(MINI_CORP.name)
|
||||
assertThat(owningKey.valueAs<PublicKey>(mapper)).isEqualTo(MINI_CORP.publicKey)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `NodeInfo serialisation`() {
|
||||
val (nodeInfo) = createNodeInfoAndSigned(ALICE_NAME)
|
||||
val json = mapper.valueToTree<ObjectNode>(nodeInfo)
|
||||
assertThat(json.fieldNames()).containsOnly("addresses", "legalIdentitiesAndCerts", "platformVersion", "serial")
|
||||
val address = (json["addresses"] as ArrayNode).also { assertThat(it).hasSize(1) }[0]
|
||||
assertThat(mapper.convertValue<NetworkHostAndPort>(address)).isEqualTo(nodeInfo.addresses[0])
|
||||
val identity = (json["legalIdentitiesAndCerts"] as ArrayNode).also { assertThat(it).hasSize(1) }[0]
|
||||
assertThat(mapper.convertValue<CordaX500Name>(identity["name"])).isEqualTo(ALICE_NAME)
|
||||
assertThat(mapper.convertValue<Int>(json["platformVersion"])).isEqualTo(nodeInfo.platformVersion)
|
||||
assertThat(mapper.convertValue<Long>(json["serial"])).isEqualTo(nodeInfo.serial)
|
||||
val (addresses, legalIdentitiesAndCerts, platformVersion, serial) = json.assertHasOnlyFields(
|
||||
"addresses",
|
||||
"legalIdentitiesAndCerts",
|
||||
"platformVersion",
|
||||
"serial"
|
||||
)
|
||||
addresses.run {
|
||||
assertThat(this).hasSize(1)
|
||||
assertThat(this[0].valueAs<NetworkHostAndPort>(mapper)).isEqualTo(nodeInfo.addresses[0])
|
||||
}
|
||||
legalIdentitiesAndCerts.run {
|
||||
assertThat(this).hasSize(1)
|
||||
assertThat(this[0]["name"].valueAs<CordaX500Name>(mapper)).isEqualTo(ALICE_NAME)
|
||||
}
|
||||
assertThat(platformVersion.intValue()).isEqualTo(nodeInfo.platformVersion)
|
||||
assertThat(serial.longValue()).isEqualTo(nodeInfo.serial)
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -274,6 +381,40 @@ class JacksonSupportTest {
|
||||
assertThat(convertToNodeInfo()).isEqualTo(nodeInfo)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun CertPath() {
|
||||
val certPath = MINI_CORP.identity.certPath
|
||||
val json = mapper.valueToTree<ObjectNode>(certPath)
|
||||
println(mapper.writeValueAsString(json))
|
||||
val (type, certificates) = json.assertHasOnlyFields("type", "certificates")
|
||||
assertThat(type.textValue()).isEqualTo(certPath.type)
|
||||
certificates.run {
|
||||
val serialNumbers = elements().asSequence().map { it["serialNumber"].bigIntegerValue() }.toList()
|
||||
assertThat(serialNumbers).isEqualTo(certPath.x509Certificates.map { it.serialNumber })
|
||||
}
|
||||
assertThat(mapper.convertValue<CertPath>(json).encoded).isEqualTo(certPath.encoded)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun X509Certificate() {
|
||||
val cert: X509Certificate = MINI_CORP.identity.certificate
|
||||
val json = mapper.valueToTree<ObjectNode>(cert)
|
||||
println(mapper.writeValueAsString(json))
|
||||
assertThat(json["serialNumber"].bigIntegerValue()).isEqualTo(cert.serialNumber)
|
||||
assertThat(json["issuer"].valueAs<X500Principal>(mapper)).isEqualTo(cert.issuerX500Principal)
|
||||
assertThat(json["subject"].valueAs<X500Principal>(mapper)).isEqualTo(cert.subjectX500Principal)
|
||||
assertThat(json["publicKey"].valueAs<PublicKey>(mapper)).isEqualTo(cert.publicKey)
|
||||
assertThat(json["notAfter"].valueAs<Date>(mapper)).isEqualTo(cert.notAfter)
|
||||
assertThat(json["notBefore"].valueAs<Date>(mapper)).isEqualTo(cert.notBefore)
|
||||
assertThat(json["encoded"].binaryValue()).isEqualTo(cert.encoded)
|
||||
assertThat(mapper.convertValue<X509Certificate>(json).encoded).isEqualTo(cert.encoded)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun X500Principal() {
|
||||
testToStringSerialisation(X500Principal("CN=Common,L=London,O=Org,C=UK"))
|
||||
}
|
||||
|
||||
private fun makeDummyStx(): SignedTransaction {
|
||||
val wtx = DummyContract.generateInitial(1, DUMMY_NOTARY, MINI_CORP.ref(1))
|
||||
.toWireTransaction(services)
|
||||
@ -290,6 +431,17 @@ class JacksonSupportTest {
|
||||
assertThat(mapper.convertValue<T>(json)).isEqualTo(value)
|
||||
}
|
||||
|
||||
private fun JsonNode.assertHasOnlyFields(vararg fieldNames: String): List<JsonNode> {
|
||||
assertThat(fieldNames()).containsOnly(*fieldNames)
|
||||
return fieldNames.map { this[it] }
|
||||
}
|
||||
|
||||
@CordaSerializable
|
||||
private data class TestData(val name: CordaX500Name, val summary: String, val subData: SubTestData)
|
||||
|
||||
@CordaSerializable
|
||||
private data class SubTestData(val value: Int)
|
||||
|
||||
private class TestPartyObjectMapper : JacksonSupport.PartyObjectMapper {
|
||||
val identities = ArrayList<Party>()
|
||||
val nodes = ArrayList<NodeInfo>()
|
||||
|
@ -60,6 +60,7 @@ public class StandaloneCordaRPCJavaClientTest {
|
||||
true,
|
||||
Collections.singletonList(rpcUser),
|
||||
true,
|
||||
true,
|
||||
Collections.emptyList()
|
||||
);
|
||||
|
||||
|
@ -1,13 +1,16 @@
|
||||
## The following requirements were added by pip freeze:
|
||||
alabaster==0.7.8
|
||||
Babel==2.3.4
|
||||
certifi==2018.4.16
|
||||
chardet==3.0.4
|
||||
CommonMark==0.5.5
|
||||
docutils==0.12
|
||||
future==0.16.0
|
||||
idna==2.6
|
||||
imagesize==0.7.1
|
||||
Jinja2==2.8
|
||||
m2r==0.1.14
|
||||
MarkupSafe==0.23
|
||||
mistune==0.8.3
|
||||
packaging==17.1
|
||||
pdfrw==0.4
|
||||
Pillow==5.1.0
|
||||
|
@ -24,6 +24,13 @@ input {
|
||||
letter-spacing: 0.3px
|
||||
}
|
||||
|
||||
p.caption {
|
||||
margin-top: 2em;
|
||||
}
|
||||
span.caption-text {
|
||||
font-size: larger;
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 100%; /* Get rid of RTD rule that assumes nobody changes their browser font size */
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
Building a Corda VM from the AWS Marketplace
|
||||
============================================
|
||||
AWS Marketplace
|
||||
===============
|
||||
|
||||
To help you design, build and test applications on Corda, called CorDapps, a Corda network AMI can be deployed from the `AWS Marketplace <https://aws.amazon.com/marketplace/pp/B077PG9SP5>`__. Instructions on running Corda nodes can be found `here <https://docs.corda.net/deploying-a-node.html>`_.
|
||||
|
||||
@ -12,7 +12,7 @@ Pre-requisites
|
||||
|
||||
|
||||
Deploying a Corda Network
|
||||
---------------------------
|
||||
-------------------------
|
||||
|
||||
Browse to the `AWS Marketplace <https://aws.amazon.com/marketplace>`__ and search for Corda.
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
Building a Corda Network on Azure Marketplace
|
||||
=============================================
|
||||
Azure Marketplace
|
||||
=================
|
||||
|
||||
To help you design, build and test applications on Corda, called CorDapps, a Corda network can be deployed on the `Microsoft Azure Marketplace <https://azure.microsoft.com/en-gb/overview/what-is-azure>`_
|
||||
|
||||
|
@ -22,7 +22,10 @@ Unreleased
|
||||
* ``NodeInfo`` objects are serialised as an object and can be looked up using the same mechanism as ``Party``
|
||||
* ``NetworkHostAndPort`` serialised according to its ``toString()``
|
||||
* ``PartyAndCertificate`` is serialised as an object containing the name and owning key
|
||||
* ``SignedTransaction`` can now be serialized to JSON and deserialized back into an object.
|
||||
* ``SerializedBytes`` is serialised by converting the bytes into the object it represents, which is then serialised into
|
||||
a JSON/YAML object
|
||||
* ``CertPath`` and ``X509Certificate`` are serialised as objects and can be deserialised back
|
||||
* ``SignedTransaction`` is serialised into its ``txBits`` and ``signatures`` and can be deserialised back
|
||||
|
||||
* Several members of ``JacksonSupport`` have been deprecated to highlight that they are internal and not to be used.
|
||||
|
||||
|
@ -25,10 +25,8 @@ import sphinx_rtd_theme
|
||||
# If your documentation needs a minimal Sphinx version, state it here.
|
||||
#needs_sphinx = '1.0'
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = ['rst2pdf.pdfbuilder']
|
||||
# m2r is a Markdown to RST converter, as our design docs use Markdown.
|
||||
extensions = ['rst2pdf.pdfbuilder', 'm2r']
|
||||
|
||||
# PDF configuration
|
||||
pdf_documents = [('index', u'corda-developer-site', u'Corda Developer Documentation', u'R3')]
|
||||
@ -41,11 +39,10 @@ templates_path = ['_templates']
|
||||
|
||||
# The suffix(es) of source filenames.
|
||||
# You can specify multiple suffix as a list of string:
|
||||
# source_suffix = ['.rst', '.md']
|
||||
source_suffix = '.rst'
|
||||
source_suffix = ['.rst', '.md']
|
||||
|
||||
# The encoding of source files.
|
||||
#source_encoding = 'utf-8-sig'
|
||||
source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
@ -1,5 +1,5 @@
|
||||
Corda networks
|
||||
==============
|
||||
Networks
|
||||
========
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
@ -1,5 +1,5 @@
|
||||
Corda nodes
|
||||
===========
|
||||
Nodes
|
||||
=====
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
@ -15,7 +15,6 @@ The Corda repository comprises the following folders:
|
||||
* **finance** defines a range of elementary contracts (and associated schemas) and protocols, such as abstract fungible
|
||||
assets, cash, obligation and commercial paper
|
||||
* **gradle** contains the gradle wrapper which you'll use to execute gradle commands
|
||||
* **gradle-plugins** contains some additional plugins which we use to deploy Corda nodes
|
||||
* **lib** contains some dependencies
|
||||
* **node** contains the core code of the Corda node (eg: node driver, node services, messaging, persistence)
|
||||
* **node-api** contains data structures shared between the node and the client module, e.g. types sent via RPC
|
||||
|
@ -10,7 +10,7 @@ These should be written in [Markdown](https://github.com/adam-p/markdown-here/wi
|
||||
|
||||
## Design Review Process
|
||||
|
||||
Please see the [design review process](./designReviewProcess.md).
|
||||
Please see the [design review process](design-review-process.md).
|
||||
|
||||
* Feature request submission
|
||||
* High level design
|
||||
@ -21,7 +21,7 @@ Please see the [design review process](./designReviewProcess.md).
|
||||
|
||||
## Design Template
|
||||
|
||||
Please copy this [directory](./designTemplate) to a new location under `/docs/source/design` (use a meaningful short descriptive directory name) and use the [Design Template](./designTemplate/design.md) contained within to guide writing your Design Proposal. Whilst the section headings may be treated as placeholders for guidance, you are expected to be able to answer any questions related to pertinent section headings (where relevant to your design) at the design review stage. Use the [Design Decision Template](./designTemplate/decisions/decision.md) (as many times as needed) to record the pros and cons, and justification of any design decision recommendations where multiple options are available. These should be directly referenced from the *Design Decisions* section of the main design document.
|
||||
Please copy this [directory](template) to a new location under `/docs/source/design` (use a meaningful short descriptive directory name) and use the [Design Template](template/design.md) contained within to guide writing your Design Proposal. Whilst the section headings may be treated as placeholders for guidance, you are expected to be able to answer any questions related to pertinent section headings (where relevant to your design) at the design review stage. Use the [Design Decision Template](template/decisions/decision.md) (as many times as needed) to record the pros and cons, and justification of any design decision recommendations where multiple options are available. These should be directly referenced from the *Design Decisions* section of the main design document.
|
||||
|
||||
The design document may be completed in one or two iterations, by completing the following main two sections individually or singularly:
|
||||
|
||||
|
@ -1,6 +1,3 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Certificate hierarchy levels
|
||||
============================================
|
||||
|
||||
@ -12,7 +9,8 @@ The decision of how many levels to include is a key feature of the [proposed cer
|
||||
|
||||
### Option 1: 2-level hierarchy
|
||||
|
||||
Under this option, intermediate CA certificates for key signing services (Doorman, Network Map, CRL) are generated as direct children of the root certificate.
|
||||
Under this option, intermediate CA certificates for key signing services (Doorman, Network Map, CRL) are generated as
|
||||
direct children of the root certificate.
|
||||
|
||||

|
||||
|
||||
@ -23,11 +21,13 @@ Under this option, intermediate CA certificates for key signing services (Doorma
|
||||
|
||||
#### Disadvantages
|
||||
|
||||
- The Root CA certificate is used to sign both intermediate certificates and CRL. This may be considered as a drawback as the Root CA should be used only to issue other certificates.
|
||||
- The Root CA certificate is used to sign both intermediate certificates and CRL. This may be considered as a drawback
|
||||
as the Root CA should be used only to issue other certificates.
|
||||
|
||||
### Option 2: 3-level hierarchy
|
||||
|
||||
Under this option, an additional 'Company CA' cert is generated from the root CA cert, which is then used to generate intermediate certificates.
|
||||
Under this option, an additional 'Company CA' cert is generated from the root CA cert, which is then used to generate
|
||||
intermediate certificates.
|
||||
|
||||

|
||||
|
||||
@ -44,4 +44,7 @@ Under this option, an additional 'Company CA' cert is generated from the root CA
|
||||
|
||||
Proceed with option 1: 2-level hierarchy.
|
||||
|
||||
No authoritative argument from a security standpoint has been made which would justify the added complexity of option 2. Given the business impact of revoking the Company CA certificate, this must be considered an extremely unlikely event with comparable implications to the revocation of the root certificate itself; hence no practical justification for the addition of the third level is observed.
|
||||
No authoritative argument from a security standpoint has been made which would justify the added complexity of option 2.
|
||||
Given the business impact of revoking the Company CA certificate, this must be considered an extremely unlikely event
|
||||
with comparable implications to the revocation of the root certificate itself; hence no practical justification for the
|
||||
addition of the third level is observed.
|
@ -1,12 +1,10 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Certificate Hierarchy
|
||||
============================================
|
||||
======================================
|
||||
|
||||
## Background / Context
|
||||
|
||||
This document purpose is to make a decision on the certificate hierarchy. It is necessary to make this decision as it affects development of features (e.g. Certificate Revocation List).
|
||||
This document purpose is to make a decision on the certificate hierarchy. It is necessary to make this decision as it
|
||||
affects development of features (e.g. Certificate Revocation List).
|
||||
|
||||
## Options Analysis
|
||||
|
84
docs/source/design/certificate-hierarchies/design.md
Normal file
@ -0,0 +1,84 @@
|
||||
# Certificate hierarchies
|
||||
|
||||
.. important:: This design doc applies to the main Corda network. Other networks may use different certificate hierarchies.
|
||||
|
||||
## Overview
|
||||
|
||||
A certificate hierarchy is proposed to enable effective key management in the context of managing Corda networks.
|
||||
This includes certificate usage for the data signing process and certificate revocation process
|
||||
in case of a key compromise. At the same time, result should remain compliant with
|
||||
[OCSP](https://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol) and [RFC 5280](https://www.ietf.org/rfc/rfc5280.txt)
|
||||
|
||||
## Background
|
||||
|
||||
Corda utilises public key cryptography for signing and authentication purposes, and securing communication
|
||||
via TLS. As a result, every entity participating in a Corda network owns one or more cryptographic key pairs {*private,
|
||||
public*}. Integrity and authenticity of an entity's public key is assured using digital certificates following the
|
||||
[X.509 standard](https://tools.ietf.org/html/rfc5280), whereby the receiver’s identity is cryptographically bonded to
|
||||
his or her public key.
|
||||
|
||||
Certificate Revocation List (CRL) functionality interacts with the hierarchy of the certificates, as the revocation list
|
||||
for any given certificate must be signed by the certificate's issuer. Therefore if we have a single doorman CA, the sole
|
||||
CRL for node CA certificates would be maintained by that doorman CA, creating a bottleneck. Further, if that doorman CA
|
||||
is compromised and its certificate revoked by the root certificate, the entire network is invalidated as a consequence.
|
||||
|
||||
The current solution of a single intermediate CA is therefore too simplistic.
|
||||
|
||||
Further, the split and location of intermediate CAs has impact on where long term infrastructure is hosted, as the CRLs
|
||||
for certificates issued by these CAs must be hosted at the same URI for the lifecycle of the issued certificates.
|
||||
|
||||
## Scope
|
||||
|
||||
Goals:
|
||||
|
||||
* Define effective certificate relationships between participants and Corda network services (i.e. nodes, notaries, network map, doorman).
|
||||
* Enable compliance with both [OCSP](https://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol) and [RFC 5280](https://www.ietf.org/rfc/rfc5280.txt) (CRL)-based revocation mechanisms
|
||||
* Mitigate relevant security risks (keys being compromised, data privacy loss etc.)
|
||||
|
||||
Non-goals:
|
||||
|
||||
* Define an end-state mechanism for certificate revocation.
|
||||
|
||||
## Requirements
|
||||
|
||||
In case of a private key being compromised, or a certificate incorrectly issued, it must be possible for the issuer to
|
||||
revoke the appropriate certificate(s).
|
||||
|
||||
The solution needs to scale, keeping in mind that the list of revoked certificates from any given certificate authority
|
||||
is likely to grow indefinitely. However for an initial deployment a temporary certificate authority may be used, and
|
||||
given that it will not require to issue certificates in the long term, scaling issues are less of a concern in this
|
||||
context.
|
||||
|
||||
## Design Decisions
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
decisions/levels.md
|
||||
decisions/tls-trust-root.md
|
||||
|
||||
## **Target** Solution
|
||||
|
||||

|
||||
|
||||
The design introduces discrete intermediate CAs below the network trust root for each logical service exposed by the doorman - specifically:
|
||||
|
||||
1. Node CA certificate issuance
|
||||
2. Network map signing
|
||||
3. Certificate Revocation List (CRL) signing
|
||||
4. OCSP revocation signing
|
||||
|
||||
The use of discrete certificates in this way facilitates subsequent changes to the model, including retiring and replacing certificates as needed.
|
||||
|
||||
Each of the above certificates will specify a CRL allowing the certificate to be revoked. The root CA operator
|
||||
(primarily R3) will be required to maintain this CRL for the lifetime of the process.
|
||||
|
||||
TLS certificates will remain issued under Node CA certificates (see [decision: TLS trust
|
||||
root](./decisions/tls-trust-root.html)).
|
||||
|
||||
Nodes will be able to specify CRL(s) for TLS certificates they issue; in general, they will be required to such CRLs for
|
||||
the lifecycle of the TLS certificates.
|
||||
|
||||
In the initial state, a single doorman intermediate CA will be used for issuing all node certificates. Further
|
||||
intermediate CAs for issuance of node CA certificates may subsequently be added to the network, where appropriate,
|
||||
potentially split by geographic region or otherwise.
|
Before Width: | Height: | Size: 142 KiB After Width: | Height: | Size: 142 KiB |
Before Width: | Height: | Size: 175 KiB After Width: | Height: | Size: 175 KiB |
Before Width: | Height: | Size: 309 KiB After Width: | Height: | Size: 309 KiB |
Before Width: | Height: | Size: 349 KiB After Width: | Height: | Size: 349 KiB |
Before Width: | Height: | Size: 353 KiB After Width: | Height: | Size: 353 KiB |
35
docs/source/design/design-review-process.md
Normal file
@ -0,0 +1,35 @@
|
||||
# Design review process
|
||||
|
||||
The Corda design review process defines a means of collaborating approving Corda design thinking in a consistent,
|
||||
structured, easily accessible and open manner.
|
||||
|
||||
The process has several steps:
|
||||
|
||||
1. High level discussion with the community and developers on corda-dev.
|
||||
2. Writing a design doc and submitting it for review via a PR to this directory. See other design docs and the
|
||||
design doc template (below).
|
||||
3. Respond to feedback on the github discussion.
|
||||
4. You may be invited to a design review board meeting. This is a video conference in which design may be debated in
|
||||
real time. Notes will be sent afterwards to corda-dev.
|
||||
5. When the design is settled it will be approved and can be merged as normal.
|
||||
|
||||
The following diagram illustrates the process flow:
|
||||
|
||||

|
||||
|
||||
At least some of the following people will take part in a DRB meeting:
|
||||
|
||||
* Richard G Brown (CTO)
|
||||
* James Carlyle (Chief Engineer)
|
||||
* Mike Hearn (Lead Platform Engineer)
|
||||
* Mark Oldfield (Lead Platform Architect)
|
||||
* Jonathan Sartin (Information Security manager)
|
||||
* Select external key contributors (directly involved in design process)
|
||||
|
||||
The Corda Technical Advisory Committee may also be asked to review a design.
|
||||
|
||||
Here's the outline of the design doc template:
|
||||
|
||||
.. toctree::
|
||||
|
||||
template/design.md
|
@ -1,106 +0,0 @@
|
||||
|
||||
# Overview
|
||||
|
||||
The Corda Design Review process defines a means of editing, storing, collaborating, reviewing and approving Corda documentation in a consistent, structured, easily accessible and open manner.
|
||||
|
||||
# Background
|
||||
|
||||
Historically, Corda design documentation has been produced in an ad hoc fashion to include:
|
||||
* Multiple sources and formats of storage
|
||||
* Internal ([Tech/Arch technical discussion](https://r3-cev.atlassian.net/wiki/spaces/AR/pages/2588746/Internal+Technical+Discussion)) and External ([AWG design documents](https://r3-cev.atlassian.net/wiki/spaces/AWG/pages/56623418/Design+Documents)) facing wiki(s)
|
||||
* [Public github wiki](https://github.com/corda/corda/wiki)
|
||||
* [Discourse posts](https://discourse.corda.net/c/corda-discussion)
|
||||
* Multiple authored versions of same design with differing coverage
|
||||
* Elaboration and/or additions to scope
|
||||
* Differing opinions, proposals, suggestions.
|
||||
* Unstructured prose (no consistency in format and structure)
|
||||
* Lack of versioning (wiki documents typically evolve without versioned references)
|
||||
* Lack of traceability (audit) to original requirement(s)
|
||||
* Undefined review and approval process, leading to misunderstandings and open interpretations at time of implementation by platform development team
|
||||
* Lack of proposed implementation plan (time, resources, effort).
|
||||
* Often missing stakeholder collaboration and review in the feedback cycle.
|
||||
|
||||
# Process
|
||||
|
||||
This process specifies:
|
||||
|
||||
1. Usage of a design template to include:
|
||||
* Versioning: design documents can be referenced at a point in time, and evolve from such.
|
||||
* Review and approval history: incorporating relevant stakeholders from R3 (Platform, Product Management, Services) and
|
||||
other relevant review groups (community, partners, customers, key collaborators) as deemed appropriate to the request. Ensure design
|
||||
meets the requirements and is realizable within a proposed implementation timeframe.
|
||||
* Consistent structure and headings: top level headings should be preserved, second level headings provide guidance on
|
||||
content to include, and may be omitted where not relevant.
|
||||
* The design template includes both High Level (conceptual, logical) and Technical (implementation specific) sections.
|
||||
* Design decisions are clearly identified with pros/cons of proposed options, and agreed recommendation.
|
||||
|
||||
2. Document review and approval by relevant stakeholders and impacted parties to include R3 organisational units, such as Platform Engineering, Product Management and Services (where relevant), and key stakeholders, to include customers, partners, key collaborators, and community leaders.
|
||||
* Product owner (originator of requirements)
|
||||
* Design Approval Board (DAB)
|
||||
* Platform Development technical lead (and/or nominated developer(s))
|
||||
* Project Technical Lead / Solution Architect (if originating from an R3 Technical Services project)
|
||||
* Other identified stakeholders (community leaders, partners, customers, key collaborators)
|
||||
|
||||
3. Planning: allocation to Corda (open source) or Enterprise project JIRA epic(s) (and/or set of stories) and prioritisation within Product Backlog for future implementation within a Development Team Sprint.
|
||||
|
||||
4. Document repository locations, according to whether the design is related to Open Source or Enterprise (internal only).
|
||||
The recommended repository source is GitHub, and documents should be stored in [Markdown](https://en.wikipedia.org/wiki/Markdown).
|
||||
The collaboration and review process should follow the standard [GitHub Pull Request](https://confluence.atlassian.com/bitbucket/work-with-pull-requests-223220593.html) mechanism.
|
||||
* [Enterprise Github repository](https://github.com/corda/enterprise)
|
||||
* [Open Source Github repository](https://github.com/corda/corda)
|
||||
|
||||
The following diagram illustrates the process flow:
|
||||
|
||||

|
||||
|
||||
# Review Groups
|
||||
Design documents should include all relevant stakeholders in their distribution (mostly as PR reviewers in github). This will often vary and depend on the origin of the Feature Request, particularly for high level business requirements. Technical Design Documents will tend to include a small set of stakeholders (Design Approval Board, Platform Development, DevOps). Final approval authority lays with at least one member of the Design Approval Board (DAB) or nominated delegate(s).
|
||||
|
||||
Design Approval Board (DAB)
|
||||
* Richard G Brown (CTO)
|
||||
* James Carlyle (Chief Engineer)
|
||||
* Mike Hearn (Lead Platform Engineer)
|
||||
* Mark Oldfield (Lead Platform Architect)
|
||||
* Jonathan Sartin (Information Security manager)
|
||||
* Select external key contributors (directly involved in design process)
|
||||
|
||||
Other review groups inlcude:
|
||||
|
||||
* Product Management
|
||||
|
||||
* Developer Relations
|
||||
|
||||
* Platform Development Team Leads
|
||||
|
||||
(may nominate team members as design leads)
|
||||
|
||||
* DevOps
|
||||
|
||||
* Services – Project (Incubation & Acceleration)
|
||||
|
||||
* Nominated project leads
|
||||
|
||||
Services – Technical (Consulting)
|
||||
* Nominated solution architects
|
||||
|
||||
* External
|
||||
|
||||
* AWG (general)
|
||||
* Consortium members
|
||||
* ISV, SI, Partners
|
||||
* Customers
|
||||
* Key collaborators
|
||||
|
||||
# Applicability and Timing
|
||||
|
||||
This process should be applied to any major feature request gathered by the product management team or lead technologists that has been entered in the product backlog as a requirement, and has been prioritized for imminent execution.
|
||||
|
||||
Publication and distribution of a design document from initial review to full approval will vary on a case by case basis.
|
||||
|
||||
In general,
|
||||
* High Level designs may require a longer approval cycle as they may need to host a formal review meeting with the DAB in attendance,
|
||||
and will typically have larger stakeholder audiences (potentially including external reviewers), thus leading to multiple iterations of revision.
|
||||
In either case the High Level design must be raised as a GitHub PR and obtain formal approval by reviewers.
|
||||
* Technical designs are anticipated to go through a shorter cycle, with immediate feedback via the GitHub PR workflow.
|
||||
Once approved, a Technical Design should be decomposed into a set of implementable Epic/Stories for prioritization and
|
||||
scheduling as part of Development team(s) delivery cycle(s).
|
@ -1,242 +0,0 @@
|
||||

|
||||
|
||||
# Design Template
|
||||
|
||||
Please read the [Design Review Process](../designReviewProcess.md) before completing a design.
|
||||
|
||||
This design template should be used for capturing new Corda feature requests that have been raised as JIRA requirements stories by the product management team. The design may be completed in two stages depending on the complexity and scope of the new feature.
|
||||
|
||||
1. High-level: conceptual designs based on business requirements and/or technical vision. Without detailing implementation, this level of design should position the overall solution within the Corda architecture from a logical perspective (independent from code implementation). It should illustrate and walk through the use case scenarios intended to be satisfied by this new feature request. The design should consider non-functional aspects of the system such as performance, scalability, high availability, security, and operational aspects such as management and monitoring.
|
||||
|
||||
This section of the document should go through a formal review process (eg. presentation of design at meeting and subsequent PR review workflow)
|
||||
|
||||
2. Technical: implementable designs with reference to Corda code. This level of design should focus on API specifications, service definitions, public library additions, data models and schemas, code modularity, configuration, execution and deployment of the new feature. It should also list any new software libraries, frameworks or development approaches to be adopted. The technical design should also consider all aspects of the test lifecycle (unit, integration, smoke tests, performance).
|
||||
|
||||
This section of the document should be raised as a PR for development team review.
|
||||
|
||||
An outcome of the Design Document should be an implementation plan that defines JIRA stories and tasks to be completed to produce shippable, demonstrable, executable code.
|
||||
|
||||
Please complete and/or remove section headings as appropriate to the design being proposed. These are provided as guidance and to structure the design in a consistent and coherent manner.
|
||||
|
||||
DOCUMENT MANAGEMENT
|
||||
---
|
||||
|
||||
Design documents should follow the standard GitHub version management and pull request (PR) review workflow mechanism.
|
||||
|
||||
## Document Control
|
||||
|
||||
| Title | |
|
||||
| -------------------- | ---------------------------------------- |
|
||||
| Date | |
|
||||
| Author | |
|
||||
| Distribution | (see review groups in design review process) |
|
||||
| Corda target version | (enterprise, open source and enterprise) |
|
||||
| JIRA reference | (reference to primary Feature Request JIRA story outlining requirements) |
|
||||
|
||||
## Approvals
|
||||
|
||||
#### Document Sign-off
|
||||
|
||||
| Author | |
|
||||
| ----------------- | ---------------------------------------- |
|
||||
| Reviewer(s) | (GitHub PR reviewers) |
|
||||
| Final approver(s) | (GitHub PR approver(s) from Design Approval Board) |
|
||||
|
||||
#### Design Decisions
|
||||
|
||||
| Description | Recommendation | Approval* |
|
||||
| ---------------------------------------- | --------------- | ----------------------- |
|
||||
| [Design Decision 1](decisions/decision.md) | Selected option | (Design Approval Board) |
|
||||
| [Design Decision 2](decisions/decision.md) | Selected option | (Design Approval Board) |
|
||||
| [Design Decision 3](decisions/decision.md) | Selected option | (Design Approval Board) |
|
||||
|
||||
\* only required for formal Design Approval Board meetings.
|
||||
|
||||
## Document History
|
||||
|
||||
To be managed by GitHub revision control
|
||||
(please use meaningful identifiers when committing a PR approved design to GitHub - eg. my super design V1.0)
|
||||
|
||||
HIGH LEVEL DESIGN
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
General overall of design proposal (goal, objectives, simple outline)
|
||||
|
||||
## Background
|
||||
|
||||
Description of existing solution (if any) and/or rationale for requirement.
|
||||
|
||||
* Reference(s) to discussions held elsewhere (slack, wiki, etc).
|
||||
* Definitions, acronyms and abbreviations
|
||||
|
||||
## Scope
|
||||
|
||||
* Goals
|
||||
* Non-goals (eg. out of scope)
|
||||
* Reference(s) to similar or related work
|
||||
|
||||
## Timeline
|
||||
|
||||
* Is this a short, medium or long-term solution?
|
||||
* Outline timeline expectations
|
||||
|
||||
Eg1. required for Customer Project X by end of Qy'2049)
|
||||
|
||||
Eg2. required to release Enterprise Vx.y (reference roadmap)
|
||||
|
||||
* Where short-term design, is this evolvable / extensible or stop-gap (eg. potentially throwaway)?
|
||||
|
||||
## Requirements
|
||||
|
||||
* Reference(s) to any of following:
|
||||
|
||||
* Captured Product Backlog JIRA entry
|
||||
|
||||
* Internal White Paper feature item and/or visionary feature
|
||||
|
||||
* Project related requirement (POC, RFP, Pilot, Prototype) from
|
||||
|
||||
* Internal Incubator / Accelerator project
|
||||
|
||||
* Direct from Customer, ISV, SI, Partner
|
||||
* Use Cases
|
||||
* Assumptions
|
||||
|
||||
## Design Decisions
|
||||
|
||||
List of design decisions identified in defining the target solution:
|
||||
(for each item, please complete the attached [Design Decision template](decisions/decision.md))
|
||||
|
||||
| Heading (link to completed Decision document using template) | Recommendation |
|
||||
| ---------------------------------------- | -------------- |
|
||||
| [Design Decision 1](decisions/decision.md) | Option A |
|
||||
| [Design Decision 2](decisions/decision.md) | TBD* |
|
||||
| [Design Decision 3](decisions/decision.md) | Option B |
|
||||
|
||||
It is reasonable to expect decisions to be challenged prior to any formal review and approval.
|
||||
*In certain scenarios the Design Decision itself may solicit a recommendation from reviewers.
|
||||
|
||||
## Target Solution
|
||||
|
||||
* Illustrate any business process with diagrams
|
||||
|
||||
* Business Process Flow (or formal BPMN 2.0), swimlane activity
|
||||
|
||||
* UML: activity, state, sequence
|
||||
|
||||
* Illustrate operational solutions with deployment diagrams
|
||||
|
||||
* Network
|
||||
|
||||
* Validation matrix (against requirements)
|
||||
|
||||
* Role, requirement, how design satisfies requirement
|
||||
|
||||
* Sample walk through (against Use Cases)
|
||||
|
||||
* Implications
|
||||
|
||||
* Technical
|
||||
* Operational
|
||||
* Security
|
||||
|
||||
* Adherence to existing industry standards or approaches
|
||||
* List any standards to be followed / adopted
|
||||
* Outstanding issues
|
||||
|
||||
## Complementary solutions
|
||||
|
||||
Other solutions that provide similar functionality and/or overlap with the proposed.
|
||||
Where overlap with existing solution(s), describe how this design fits in and complements the current state.
|
||||
|
||||
## Final recommendation
|
||||
|
||||
* Proposed solution (if more than one option presented)
|
||||
* Proceed direct to implementation
|
||||
* Proceed to Technical Design stage
|
||||
* Proposed Platform Technical team(s) to implement design (if not already decided)
|
||||
|
||||
TECHNICAL DESIGN
|
||||
---
|
||||
|
||||
## Interfaces
|
||||
|
||||
* Public APIs impact
|
||||
* Internal APIs impacted
|
||||
* Modules impacted
|
||||
|
||||
* Illustrate with Software Component diagrams
|
||||
|
||||
## Functional
|
||||
|
||||
* UI requirements
|
||||
|
||||
* Illustrate with UI Mockups and/or Wireframes
|
||||
|
||||
* (Subsystem) Components descriptions and interactions)
|
||||
|
||||
Consider and list existing impacted components and services within Corda:
|
||||
|
||||
* Doorman
|
||||
* Network Map
|
||||
* Public API's (ServiceHub, RPCOps)
|
||||
* Vault
|
||||
* Notaries
|
||||
* Identity services
|
||||
* Flow framework
|
||||
* Attachments
|
||||
* Core data structures, libraries or utilities
|
||||
* Testing frameworks
|
||||
* Pluggable infrastructure: DBs, Message Brokers, LDAP
|
||||
|
||||
* Data model & serialization impact and changes required
|
||||
|
||||
* Illustrate with ERD diagrams
|
||||
|
||||
* Infrastructure services: persistence (schemas), messaging
|
||||
|
||||
## Non-Functional
|
||||
|
||||
* Performance
|
||||
* Scalability
|
||||
* High Availability
|
||||
|
||||
## Operational
|
||||
|
||||
* Deployment
|
||||
|
||||
* Versioning
|
||||
|
||||
* Maintenance
|
||||
|
||||
* Upgradability, migration
|
||||
|
||||
* Management
|
||||
|
||||
* Audit, alerting, monitoring, backup/recovery, archiving
|
||||
|
||||
## Security
|
||||
|
||||
* Data privacy
|
||||
* Authentication
|
||||
* Access control
|
||||
|
||||
## Software Development Tools and Programming Standards to be adopted.
|
||||
|
||||
* languages
|
||||
* frameworks
|
||||
* 3rd party libraries
|
||||
* architectural / design patterns
|
||||
* supporting tools
|
||||
|
||||
## Testability
|
||||
|
||||
* Unit
|
||||
* Integration
|
||||
* Smoke
|
||||
* Non-functional (performance)
|
||||
|
||||
APPENDICES
|
||||
---
|
@ -1,86 +0,0 @@
|
||||

|
||||
|
||||
# Design
|
||||
|
||||
## Document Control
|
||||
|
||||
| Title | Certificate hierarchy |
|
||||
| -------------------- | ---------------------------------------- |
|
||||
| Date | 23/11/2017 |
|
||||
| Author | Michal Kit |
|
||||
| Distribution | Shams Asari, Patrick Kuo, Jonathan Sartin, David Lee, Matthew Nesbit, Konstantinos Chalkias |
|
||||
| Corda target version | Enterprise |
|
||||
| JIRA reference | [ENT-1133](https://r3-cev.atlassian.net/browse/ENT-1133) |
|
||||
|
||||
## Approvals
|
||||
|
||||
#### Document Sign-off
|
||||
|
||||
| Author | |
|
||||
| ----------------- | ---------------------------------------- |
|
||||
| Reviewer(s) | Shams Asari, Patrick Kuo, Jonathan Sartin, David Lee |
|
||||
| Final approver(s) | Mike Hearn |
|
||||
|
||||
HIGH LEVEL DESIGN
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
A certificate hierarchy is proposed to enable effective key management in the context of managing Corda networks, primarily Corda Connect. This includes certificate usage for the data signing process and certificate revocation process in case of a security leak. At the same time, result should remain compliant with standards: [OCSP](https://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol), [RFC 5280](https://www.ietf.org/rfc/rfc5280.txt)
|
||||
|
||||
## Background
|
||||
|
||||
Corda utilises public key (asymmetric) cryptography for signing and authentication purposes, and securing communication via TLS. As a result, every entity participating in a Corda network owns one or more cryptographic key pairs {*private, public*}. Integrity and authenticity of an entity's public key is assured using digital certificates following the [X.509 standard](https://tools.ietf.org/html/rfc5280), whereby the receiver’s identity is cryptographically bonded to his or her public key.
|
||||
|
||||
Certificate Revocation List (CRL) functionality interacts with the hierarchy of the certificates, as the revocation list for any given certificate must be signed by the certificate's issuer. Therefore if we have a single doorman CA, the sole CRL for node CA certificates would be maintained by that doorman CA, creating a bottleneck. Further, if that doorman CA is compromised and its certificate revoked by the root certificate, the entire network is invalidated as a consequence.
|
||||
|
||||
The current solution of a single intermediate CA is therefore too simplistic.
|
||||
|
||||
Further, the split and location of intermediate CAs has impact on where long term infrastructure is hosted, as the CRLs for certificates issued by these CAs must be hosted at the same URI for the lifecycle of the issued certificates.
|
||||
|
||||
## Scope
|
||||
|
||||
Goals:
|
||||
|
||||
* Define effective certificate relationships between participants and Corda network services (i.e. nodes, notaries, network map, doorman).
|
||||
* Enable compliance with both [OCSP](https://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol) and [RFC 5280](https://www.ietf.org/rfc/rfc5280.txt) (CRL)-based revocation mechanisms
|
||||
* Mitigate relevant security risks (keys being compromised, data privacy loss etc.)
|
||||
|
||||
Non-goals:
|
||||
|
||||
- Define an end-state mechanism for certificate revocation
|
||||
|
||||
## Requirements
|
||||
|
||||
In case of a private key being compromised, or a certificate incorrectly issued, it must be possible for the issuer to revoke the appropriate certificate(s).
|
||||
|
||||
The solution needs to scale, keeping in mind that the list of revoked certificates from any given certificate authority is likely to grow indefinitely. However for an initial deployment a temporary certificate authority may be used, and given that it will not require to issue certificates in the long term, scaling issues are less of a concern in this context.
|
||||
|
||||
|
||||
## Design Decisions
|
||||
|
||||
| Heading (link to completed Decision document using template) | Recommendation |
|
||||
| ---------------------------------------- | ---------------------------- |
|
||||
| [Hierarchy levels](./decisions/levels.md) | Option 1 - 2-level hierarchy |
|
||||
| [TLS trust root](./decisions/tls-trust-root.md) | Option 1 - Single trust root |
|
||||
|
||||
## **Target** Solution
|
||||
|
||||

|
||||
|
||||
The design introduces discrete intermediate CAs below the network trust root for each logical service exposed by the doorman - specifically:
|
||||
|
||||
1. Node CA certificate issuance
|
||||
2. Network map signing
|
||||
3. Certificate Revocation List (CRL) signing
|
||||
4. OCSP revocation signing
|
||||
|
||||
The use of discrete certificates in this way facilitates subsequent changes to the model, including retiring and replacing certificates as needed.
|
||||
|
||||
Each of the above certificates will specify a CRL allowing the certificate to be revoked. The root CA operator (primarily R3) will be required to maintain this CRL for the lifetime of the process.
|
||||
|
||||
TLS certificates will remain issued under Node CA certificates (see [decision: TLS trust root](./decisions/tls-trust-root.md)).
|
||||
|
||||
Nodes will be able to specify CRL(s) for TLS certificates they issue; in general, they will be required to such CRLs for the lifecycle of the TLS certificates.
|
||||
|
||||
In the initial state, a single Doorman intermediate CA will be used for issuing all node certificates. Further intermediate CAs for issuance of node CA certificates may subsequently be added to the network, where appropriate, potentially split by geographic region or otherwise.
|
Before Width: | Height: | Size: 85 KiB After Width: | Height: | Size: 85 KiB |
@ -1,24 +1,6 @@
|
||||

|
||||
# Failure detection and master election
|
||||
|
||||
# Failure detection and master election: design proposal
|
||||
|
||||
-------------------
|
||||
DOCUMENT MANAGEMENT
|
||||
===================
|
||||
|
||||
## Document Control
|
||||
|
||||
* Failure detection and master election: design proposal
|
||||
* Date: 23rd January 2018
|
||||
* Author: Bogdan Paunescu
|
||||
* Distribution: Design Review Board, Product Management, DevOps, Services - Technical (Consulting)
|
||||
* Corda target version: Enterprise
|
||||
|
||||
## Document History
|
||||
|
||||
--------------------------------------------
|
||||
HIGH LEVEL DESIGN
|
||||
============================================
|
||||
.. important:: This design document describes a feature of Corda Enterprise.
|
||||
|
||||
## Background
|
||||
|
||||
@ -31,7 +13,8 @@ This document proposes two solutions to the above mentioned issues. The strength
|
||||
|
||||
## Constraints/Requirements
|
||||
|
||||
Typical modern HA environments rely on a majority quorum of the cluster to be alive and operating normally in order to service requests. This means:
|
||||
Typical modern HA environments rely on a majority quorum of the cluster to be alive and operating normally in order to
|
||||
service requests. This means:
|
||||
|
||||
* A cluster of 1 replica can tolerate 0 failures
|
||||
* A cluster of 2 replicas can tolerate 0 failures
|
||||
@ -39,23 +22,41 @@ Typical modern HA environments rely on a majority quorum of the cluster to be al
|
||||
* A cluster of 4 replicas can tolerate 1 failure
|
||||
* A cluster of 5 replicas can tolerate 2 failures
|
||||
|
||||
This already poses a challenge to us as clients will most likely want to deploy the minimum possible number of R3 Corda nodes. Ideally that minimum would be 3 but a solution for only 2 nodes should be available (even if it provides a lesser degree of HA than 3, 5 or more nodes). The problem with having only two nodes in the cluster is there is no distinction between failure and network partition.
|
||||
This already poses a challenge to us as clients will most likely want to deploy the minimum possible number of R3 Corda
|
||||
nodes. Ideally that minimum would be 3 but a solution for only 2 nodes should be available (even if it provides a lesser
|
||||
degree of HA than 3, 5 or more nodes). The problem with having only two nodes in the cluster is there is no distinction
|
||||
between failure and network partition.
|
||||
|
||||
Users should be allowed to set a preference for which node to be active in a hot-warm environment. This would probably be done with the help of a property(persisted in the DB in order to be changed on the fly). This is an important functionality as users might want to have the active node on better hardware and switch to the back-ups and back as soon as possible.
|
||||
Users should be allowed to set a preference for which node to be active in a hot-warm environment. This would probably
|
||||
be done with the help of a property(persisted in the DB in order to be changed on the fly). This is an important
|
||||
functionality as users might want to have the active node on better hardware and switch to the back-ups and back as soon
|
||||
as possible.
|
||||
|
||||
It would also be helpful for the chosen solution to not add deployment complexity.
|
||||
|
||||
## Design decisions
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
drb-meeting-20180131.md
|
||||
|
||||
## Proposed solutions
|
||||
|
||||
Based on what is needed for Hot-Warm, 1 active node and at least one passive node (started but in stand-by mode), and the constraints identified above (automatic failover with at least 2 nodes and master preference), two frameworks have been explored: Zookeeper and Atomix. Neither apply to our use cases perfectly and require some tinkering to solve our issues, especially the preferred master election.
|
||||
Based on what is needed for Hot-Warm, 1 active node and at least one passive node (started but in stand-by mode), and
|
||||
the constraints identified above (automatic failover with at least 2 nodes and master preference), two frameworks have
|
||||
been explored: Zookeeper and Atomix. Neither apply to our use cases perfectly and require some tinkering to solve our
|
||||
issues, especially the preferred master election.
|
||||
|
||||
### Zookeeper
|
||||
|
||||

|
||||

|
||||
|
||||
Preferred leader election - while the default algorithm does not take into account a leader preference, a custom algorithm can be implemented to suit our needs.
|
||||
Preferred leader election - while the default algorithm does not take into account a leader preference, a custom
|
||||
algorithm can be implemented to suit our needs.
|
||||
|
||||
Environment with 2 nodes - while this type of set-up can't distinguish between a node failure and network partition, a workaround can be implemented by having 2 nodes and 3 zookeeper instances(3rd would be needed to form a majority).
|
||||
Environment with 2 nodes - while this type of set-up can't distinguish between a node failure and network partition, a
|
||||
workaround can be implemented by having 2 nodes and 3 zookeeper instances(3rd would be needed to form a majority).
|
||||
|
||||
Pros:
|
||||
- Very well documented
|
||||
@ -69,11 +70,12 @@ Cons:
|
||||
|
||||
### Atomix
|
||||
|
||||

|
||||

|
||||
|
||||
Preferred leader election - cannot be implemented easily; a creative solution would be required.
|
||||
|
||||
Environment with 2 nodes - using only embedded replicas, there's no solution; Atomix comes also as a standalone server which could be run outside the node as a 3rd entity to allow a quorum(see image above).
|
||||
Environment with 2 nodes - using only embedded replicas, there's no solution; Atomix comes also as a standalone server
|
||||
which could be run outside the node as a 3rd entity to allow a quorum(see image above).
|
||||
|
||||
Pros:
|
||||
- Easy to get started with
|
||||
@ -87,9 +89,14 @@ Cons:
|
||||
|
||||
## Recommendations
|
||||
|
||||
If Zookeeper is chosen, we would need to look into a solution for easy configuration and deployment (maybe docker images). Custom leader election can be implemented by following one of the [examples](https://github.com/SainTechnologySolutions/allprogrammingtutorials/tree/master/apache-zookeeper/leader-election) available online.
|
||||
If Zookeeper is chosen, we would need to look into a solution for easy configuration and deployment (maybe docker
|
||||
images). Custom leader election can be implemented by following one of the
|
||||
[examples](https://github.com/SainTechnologySolutions/allprogrammingtutorials/tree/master/apache-zookeeper/leader-election)
|
||||
available online.
|
||||
|
||||
If Atomix is chosen, a solution to enforce some sort of preferred leader needs to found. One way to do it would be to have the Corda cluster leader be a separate entity from the Atomix cluster leader. Implementing the election would then be done using the distributed resources made available by the framework.
|
||||
If Atomix is chosen, a solution to enforce some sort of preferred leader needs to found. One way to do it would be to
|
||||
have the Corda cluster leader be a separate entity from the Atomix cluster leader. Implementing the election would then
|
||||
be done using the distributed resources made available by the framework.
|
||||
|
||||
## Conclusions
|
||||
|
||||
@ -97,9 +104,15 @@ Whichever solution is chosen, using 2 nodes in a Hot-Warm environment is not ide
|
||||
|
||||
Almost every configuration option that these frameworks offer should be exposed through node.conf.
|
||||
|
||||
We've looked into using Galera which is currently used for the Notary cluster for storing the committed state hashes. It offers multi-master read/write and certification-based replication which is not leader based. It could be used to implement automatic failure detection and master election(similar to our current mutual exclusion).However, we found that it doesn't suit our needs because:
|
||||
We've looked into using Galera which is currently used for the notary cluster for storing the committed state hashes. It
|
||||
offers multi-master read/write and certification-based replication which is not leader based. It could be used to
|
||||
implement automatic failure detection and master election(similar to our current mutual exclusion).However, we found
|
||||
that it doesn't suit our needs because:
|
||||
|
||||
- it adds to deployment complexity
|
||||
- usable only with MySQL and InnoDB storage engine
|
||||
- we'd have to implement node failure detection and master election from scratch; in this regard both Atomix and Zookeeper are better suited
|
||||
|
||||
Our preference would be Zookeeper despite not being as lightweight and deployment-friendly as Atomix. The wide spread use, proper documentation and flexibility to use it not only for automatic failover and master election but also configuration management(something we might consider moving forward) makes it a better fit for our needs.
|
||||
Our preference would be Zookeeper despite not being as lightweight and deployment-friendly as Atomix. The wide spread
|
||||
use, proper documentation and flexibility to use it not only for automatic failover and master election but also
|
||||
configuration management(something we might consider moving forward) makes it a better fit for our needs.
|
@ -1,8 +1,4 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Review Board Meeting Minutes
|
||||
============================================
|
||||
# Design Review Board Meeting Minutes
|
||||
|
||||
**Date / Time:** Jan 31 2018, 11.00
|
||||
|
||||
@ -45,33 +41,54 @@ MN presented a high level summary of the options:
|
||||
Wrapper library choice for Zookeeper requires some analysis
|
||||
|
||||
|
||||
MH: predictable source of API for RAFT implementations and Zookeeper compared to Atomix. Be better to have master selector implemented as an abstraction
|
||||
MH: predictable source of API for RAFT implementations and Zookeeper compared to Atomix. Be better to have master
|
||||
selector implemented as an abstraction
|
||||
|
||||
MH: hybrid approach possible - 3rd node for oversight, i.e. 2 embedded in the node, 3rd is an observer. Zookeeper can have one node in primary data centre, one in secondary data centre and 3rd as tie-breaker
|
||||
MH: hybrid approach possible - 3rd node for oversight, i.e. 2 embedded in the node, 3rd is an observer. Zookeeper can
|
||||
have one node in primary data centre, one in secondary data centre and 3rd as tie-breaker
|
||||
|
||||
WN: why are we concerned about cost of 3 machines? MN: we're seeing / hearing clients wanting to run many nodes on one VM. Zookeeper is good for this since 1 Zookepper cluster can serve 100+ nodes
|
||||
WN: why are we concerned about cost of 3 machines? MN: we're seeing / hearing clients wanting to run many nodes on one
|
||||
VM. Zookeeper is good for this since 1 Zookepper cluster can serve 100+ nodes
|
||||
|
||||
MH: terminology clarification required: what holds the master lock? Ideally would be good to see design thinking around split node and which bits need HA. MB: as a long term vision, ideally have 1 database for many IDs and the flows for those IDs are load balanced. Regarding services internally to node being suspended, this is being investigated.
|
||||
MH: terminology clarification required: what holds the master lock? Ideally would be good to see design thinking around
|
||||
split node and which bits need HA. MB: as a long term vision, ideally have 1 database for many IDs and the flows for
|
||||
those IDs are load balanced. Regarding services internally to node being suspended, this is being investigated.
|
||||
|
||||
MH: regarding auto failover, in the event a database has its own perception of master and slave, how is this handled? Failure detector will need to grow or have local only schedule to confirm it is processing everything including connectivity between database and bus, i.e. implement a 'healthiness' concept
|
||||
MH: regarding auto failover, in the event a database has its own perception of master and slave, how is this handled?
|
||||
Failure detector will need to grow or have local only schedule to confirm it is processing everything including
|
||||
connectivity between database and bus, i.e. implement a 'healthiness' concept
|
||||
|
||||
MH: can you get into a situation where the node fails over but the database does not, but database traffic continues to be sent to down node? MB: database will go offline leading to an all-stop event.
|
||||
MH: can you get into a situation where the node fails over but the database does not, but database traffic continues to
|
||||
be sent to down node? MB: database will go offline leading to an all-stop event.
|
||||
|
||||
MH: can you have master affinity between node and database? MH: need watchdog / heartbeat solutions to confirm state of all components
|
||||
MH: can you have master affinity between node and database? MH: need watchdog / heartbeat solutions to confirm state of
|
||||
all components
|
||||
|
||||
JC: how long will this solution live? MB: will work for hot / hot flow sharding, multiple flow workers and soft locks, then this is long term solution. Service abstraction will be used so we are not wedded to Zookeeper however the abstraction work can be done later
|
||||
JC: how long will this solution live? MB: will work for hot / hot flow sharding, multiple flow workers and soft locks,
|
||||
then this is long term solution. Service abstraction will be used so we are not wedded to Zookeeper however the
|
||||
abstraction work can be done later
|
||||
|
||||
JC: does the implementation with Zookeeper have an impact on whether cloud or physical deployments are used? MB: its an internal component, not part of the larger Corda network therefore can be either. For the customer they will have to deploy a separate Zookeeper solution, but this is the same for Atomix.
|
||||
JC: does the implementation with Zookeeper have an impact on whether cloud or physical deployments are used? MB: its an
|
||||
internal component, not part of the larger Corda network therefore can be either. For the customer they will have to
|
||||
deploy a separate Zookeeper solution, but this is the same for Atomix.
|
||||
|
||||
WN: where Corda as a service is being deployed with many nodes in the cloud. Zookeeper will be better suited to big providers.
|
||||
WN: where Corda as a service is being deployed with many nodes in the cloud. Zookeeper will be better suited to big
|
||||
providers.
|
||||
|
||||
WN: concern is the customer expects to get everything on a plate, therefore will need to be educated on how to implement Zookeeper, but this is the same for other master selection solutions.
|
||||
WN: concern is the customer expects to get everything on a plate, therefore will need to be educated on how to implement
|
||||
Zookeeper, but this is the same for other master selection solutions.
|
||||
|
||||
JC: is it possible to launch R3 Corda with a button on Azure marketplace to commission a Zookeeper? Yes, if we can resource it. But expectation is Zookeeper will be used by well-informed clients / implementers so one-click option is less relevant.
|
||||
JC: is it possible to launch R3 Corda with a button on Azure marketplace to commission a Zookeeper? Yes, if we can
|
||||
resource it. But expectation is Zookeeper will be used by well-informed clients / implementers so one-click option is
|
||||
less relevant.
|
||||
|
||||
MH: how does failover work with HSMs? MB: can replicate realm so failover is trivial
|
||||
MH: how does failover work with HSMs?
|
||||
|
||||
JC: how do we document Enterprise features? Publish design docs? Enterprise fact sheets? R3 Corda marketing material? Clear seperation of documentation is required. GT: this is already achieved by havind docs.corda.net for open source Corda and docs.corda.r3.com for enterprise R3 Corda
|
||||
MN: can replicate realm so failover is trivial
|
||||
|
||||
JC: how do we document Enterprise features? Publish design docs? Enterprise fact sheets? R3 Corda marketing material?
|
||||
Clear seperation of documentation is required. GT: this is already achieved by having docs.corda.net for open source
|
||||
Corda and docs.corda.r3.com for enterprise R3 Corda
|
||||
|
||||
|
||||
### Next Steps
|
||||
|
Before Width: | Height: | Size: 100 KiB After Width: | Height: | Size: 100 KiB |
@ -1,13 +1,7 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Review Board Meeting Minutes
|
||||
============================================
|
||||
# Design Review Board Meeting Minutes
|
||||
|
||||
**Date / Time:** 16/11/2017, 14:00
|
||||
|
||||
|
||||
|
||||
## Attendees
|
||||
|
||||
- Mark Oldfield (MO)
|
||||
@ -24,9 +18,7 @@ Design Review Board Meeting Minutes
|
||||
- Jonathan Sartin (JS)
|
||||
- David Lee (DL)
|
||||
|
||||
|
||||
|
||||
## **Minutes**
|
||||
## Minutes
|
||||
|
||||
MO opened the meeting, outlining the agenda and meeting review process, and clarifying that consensus on each design decision would be sought from RGB, JC and MH.
|
||||
|
||||
@ -90,7 +82,7 @@ MN highlighted the link to AMQP serialisation work being done.
|
||||
|
||||
**DECISION CONFIRMED:** Add placeholder, subject to more detailed design proposal (RGB, JC, MH agreed)
|
||||
|
||||
### **[AMQP vs. custom protocol](./p2p-protocol.md) **
|
||||
### [AMQP vs. custom protocol](./p2p-protocol.md)
|
||||
|
||||
MN described alternative options involving onion-routing etc.
|
||||
|
||||
@ -110,7 +102,7 @@ RGB queried whether full AMQP implementation should be done in this phase. MN pr
|
||||
|
||||
**DECISION CONFIRMED:** Continue to use AMQP (RGB, JC, MH agreed)
|
||||
|
||||
### [Pluggable broker prioritisation](./pluggable-broker.md)
|
||||
### [Pluggable broker prioritisation](./pluggable-broker.md)
|
||||
|
||||
MN outlined arguments for deferring pluggable brokers, whilst describing how he’d go about implementing the functionality. MH agreed with prioritisation for later.
|
||||
|
||||
@ -124,7 +116,7 @@ AB noted Solace have functionality with conceptual similarities to the float, an
|
||||
|
||||
**DECISION CONFIRMED:** Defer support for pluggable brokers until later, except in the event that a requirement to do so emerges from higher priority float / HA work. (RGB, JC, MH agreed)
|
||||
|
||||
### **Inbound only vs. inbound & outbound connections**
|
||||
### Inbound only vs. inbound & outbound connections
|
||||
|
||||
DL sought confirmation that the group was happy with the float to act as a Listener only.MN repeated the explanation of how outbound connections would be initiated through a SOCKS 4/5 proxy. No objections were raised.
|
||||
|
||||
|
@ -1,15 +1,9 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: End-to-end encryption
|
||||
============================================
|
||||
# Design Decision: End-to-end encryption
|
||||
|
||||
## Background / Context
|
||||
|
||||
End-to-end encryption is a desirable potential design feature for the [float](../design.md).
|
||||
|
||||
|
||||
|
||||
## Options Analysis
|
||||
|
||||
### 1. No end-to-end encryption
|
||||
@ -53,4 +47,9 @@ Proceed with Option 2: Placeholder
|
||||
|
||||
## Decision taken
|
||||
|
||||
[DNB Meeting, 16/11/2017](./drb-meeting-20171116.md): Proceed with Option 2 - Add placeholder, subject to more detailed design proposal (RGB, JC, MH agreed)
|
||||
Proceed with Option 2 - Add placeholder, subject to more detailed design proposal (RGB, JC, MH agreed)
|
||||
|
||||
.. toctree::
|
||||
|
||||
drb-meeting-20171116.md
|
||||
|
||||
|
@ -1,8 +1,4 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: P2P Messaging Protocol
|
||||
============================================
|
||||
# Design Decision: P2P Messaging Protocol
|
||||
|
||||
## Background / Context
|
||||
|
||||
@ -10,8 +6,6 @@ Corda requires messages to be exchanged between nodes via a well-defined protoco
|
||||
|
||||
Determining this protocol is a critical upstream dependency for the design of key messaging components including the [float](../design.md).
|
||||
|
||||
|
||||
|
||||
## Options Analysis
|
||||
|
||||
### 1. Use AMQP
|
||||
@ -20,32 +14,46 @@ Under this option, P2P messaging will follow the [Advanced Message Queuing Proto
|
||||
|
||||
#### Advantages
|
||||
|
||||
1. As we have described in our marketing materials.
|
||||
2. Well-defined standard.
|
||||
3. Supportfor packet level flow control and explicit delivery acknowledgement.
|
||||
4. Will allow eventual swap out of Artemis for other brokers.
|
||||
1. As we have described in our marketing materials.
|
||||
2. Well-defined standard.
|
||||
3. Supportfor packet level flow control and explicit delivery acknowledgement.
|
||||
4. Will allow eventual swap out of Artemis for other brokers.
|
||||
|
||||
#### Disadvantages
|
||||
|
||||
1. AMQP is a complex protocol with many layered state machines, for which it may prove hard to verify security properties.
|
||||
2. No support for secure MAC in packets frames.
|
||||
3. No defined encryption mode beyond creating custom payload encryption and custom headers.
|
||||
4. No standardised support for queue creation/enumeration, or deletion.
|
||||
5. Use of broker durable queues and autonomousbridge transfers does not align with checkpoint timing, so that independent replication of the DB and Artemis data risks causing problems. (Writing to the DB doesn’t work currently and is probably also slow).
|
||||
1. AMQP is a complex protocol with many layered state machines, for which it may prove hard to verify security properties.
|
||||
2. No support for secure MAC in packets frames.
|
||||
3. No defined encryption mode beyond creating custom payload encryption and custom headers.
|
||||
4. No standardised support for queue creation/enumeration, or deletion.
|
||||
5. Use of broker durable queues and autonomousbridge transfers does not align with checkpoint timing, so that independent replication of the DB and Artemis data risks causing problems. (Writing to the DB doesn’t work currently and is probably also slow).
|
||||
|
||||
### 2. Develop a custom protocol
|
||||
|
||||
This option would discard existing Artemis server/AMQP support for peer-to-peer communications in favour of a custom implementation of the Corda MessagingService, which takes direct responsibility for message retries and stores the pending messages into the node's database. The wire level of this service would be built on top of a fully encrypted MIX network which would not require a fully connected graph, but rather send messages on randomly selected paths over the dynamically managed network graph topology.
|
||||
This option would discard existing Artemis server/AMQP support for peer-to-peer communications in favour of a custom
|
||||
implementation of the Corda MessagingService, which takes direct responsibility for message retries and stores the
|
||||
pending messages into the node's database. The wire level of this service would be built on top of a fully encrypted MIX
|
||||
network which would not require a fully connected graph, but rather send messages on randomly selected paths over the
|
||||
dynamically managed network graph topology.
|
||||
|
||||
Packet format would likely use the  although with the body encryption updated to a modern AEAD scheme as in https://www.cs.ru.nl/~bmennink/pubs/16cans.pdf . In this scheme, nodes would be identified in the overlay network solely by Curve25519 public key addresses and floats would be dumb nodes that only run the MIX network code and don't act as message sources, or sinks. Intermediate traffic would not be readable except by the intended waypoint and only the final node can read the payload.
|
||||
Packet format would likely use the [SPHINX packet format](http://www0.cs.ucl.ac.uk/staff/G.Danezis/papers/sphinx-eprint.pdf) although with the body encryption updated to
|
||||
a modern AEAD scheme as in https://www.cs.ru.nl/~bmennink/pubs/16cans.pdf . In this scheme, nodes would be identified in
|
||||
the overlay network solely by Curve25519 public key addresses and floats would be dumb nodes that only run the MIX
|
||||
network code and don't act as message sources, or sinks. Intermediate traffic would not be readable except by the
|
||||
intended waypoint and only the final node can read the payload.
|
||||
|
||||
Point to point links would be standard TLS and the network certificates would be whatever is acceptable to the host institutions e.g. standard Verisign certs. It is assumed institutions would select partners to connect to that they trust and permission them individually in their firewalls. Inside the MIX network the nodes would be connected mostly in a static way and use standard HELLO packets to determine the liveness of neighbour routes, then use tunnelled gossip to distribute the signed/versioned Link topology messages. Nodes will also be allowed to advertise a public IP, so some dynamic links and publicly visible nodes would exist. Network map addresses would then be mappings from Legal Identity to these overlay network addresses, not to physical network locations.
|
||||
Point to point links would be standard TLS and the network certificates would be whatever is acceptable to the host
|
||||
institutions e.g. standard Verisign certs. It is assumed institutions would select partners to connect to that they
|
||||
trust and permission them individually in their firewalls. Inside the MIX network the nodes would be connected mostly in
|
||||
a static way and use standard HELLO packets to determine the liveness of neighbour routes, then use tunnelled gossip to
|
||||
distribute the signed/versioned Link topology messages. Nodes will also be allowed to advertise a public IP, so some
|
||||
dynamic links and publicly visible nodes would exist. Network map addresses would then be mappings from Legal Identity
|
||||
to these overlay network addresses, not to physical network locations.
|
||||
|
||||
#### Advantages
|
||||
|
||||
1. Can be defined with very small message surface area that is amenable to security analysis.
|
||||
2. Packet formats can follow best practice cryptography from the start and be matched to Corda’s needs.
|
||||
3. Doesn’t require ‘Complete Graph’ structure for network if we have intermediate routing.
|
||||
3. Doesn’t require a complete graph structure for network if we have intermediate routing.
|
||||
4. More closely aligns checkpointing and message delivery handling at the application level.
|
||||
|
||||
#### Disadvantages
|
||||
@ -54,16 +62,14 @@ Point to point links would be standard TLS and the network certificates would be
|
||||
2. Effort implications - starting from scratch
|
||||
3. Technical complexity in developing a P2P protocols which is attack tolerant.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Recommendation and justification
|
||||
|
||||
Proceed with Option 1
|
||||
|
||||
|
||||
|
||||
## Decision taken
|
||||
|
||||
[DNB Meeting, 16/11/2017](./drb-meeting-20171116.md): Proceed with Option 1 - Continue to use AMQP (RGB, JC, MH agreed)
|
||||
Proceed with Option 1 - Continue to use AMQP (RGB, JC, MH agreed)
|
||||
|
||||
.. toctree::
|
||||
|
||||
drb-meeting-20171116.md
|
||||
|
@ -1,14 +1,9 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Pluggable Broker prioritisation
|
||||
============================================
|
||||
# Design Decision: Pluggable Broker prioritisation
|
||||
|
||||
## Background / Context
|
||||
|
||||
A decision on when to prioritise implementation of a pluggable broker has implications for delivery of key messaging components including the [float](../design.md).
|
||||
|
||||
|
||||
A decision on when to prioritise implementation of a pluggable broker has implications for delivery of key messaging
|
||||
components including the [float](../design.md).
|
||||
|
||||
## Options Analysis
|
||||
|
||||
@ -58,8 +53,10 @@ A decision on when to prioritise implementation of a pluggable broker has implic
|
||||
|
||||
Proceed with Option 2 (defer development of pluggable brokers until later)
|
||||
|
||||
|
||||
|
||||
## Decision taken
|
||||
|
||||
[DNB Meeting, 16/11/2017](./drb-meeting-20171116.md): Proceed with Option 2- Defer support for pluggable brokers until later, except in the event that a requirement to do so emerges from higher priority float / HA work. (RGB, JC, MH agreed)
|
||||
.. toctree::
|
||||
|
||||
drb-meeting-20171116.md
|
||||
|
||||
Proceed with Option 2 - Defer support for pluggable brokers until later, except in the event that a requirement to do so emerges from higher priority float / HA work. (RGB, JC, MH agreed)
|
||||
|
@ -1,21 +1,14 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: TLS termination point
|
||||
============================================
|
||||
# Design Decision: TLS termination point
|
||||
|
||||
## Background / Context
|
||||
|
||||
Design of the [float](../design.md) is critically influenced by the decision of where TLS connections to the node should be terminated.
|
||||
|
||||
|
||||
Design of the [float](../design.md) is critically influenced by the decision of where TLS connections to the node should
|
||||
be terminated.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
### 1. Terminate TLS on Firewall
|
||||
|
||||
|
||||
|
||||
#### Advantages
|
||||
|
||||
1. Common practice for DMZ web solutions, often with an HSM associated with the Firewall and should be familiar for banks to setup.
|
||||
@ -39,11 +32,8 @@ Design of the [float](../design.md) is critically influenced by the decision of
|
||||
##### Disadvantages
|
||||
|
||||
1. More work than the do-nothing approach
|
||||
|
||||
2. More protocol to design for sending across the inner firewall.
|
||||
|
||||
|
||||
|
||||
### 2. Direct TLS Termination onto Float
|
||||
|
||||
#### Advantages
|
||||
@ -96,8 +86,6 @@ Design of the [float](../design.md) is critically influenced by the decision of
|
||||
|
||||
Proceed with Variant option 1a: Terminate on firewall; include SASL connection checking.
|
||||
|
||||
|
||||
|
||||
## Decision taken
|
||||
|
||||
[DNB Meeting, 16/11/2017](./drb-meeting-20171116.md): Proceed with option 2b - Terminate on float, inject key from internal portion of the float (RGB, JC, MH agreed)
|
||||
|
@ -1,32 +1,14 @@
|
||||

|
||||
|
||||
# Float Design
|
||||
|
||||
--------------------------------------------
|
||||
DOCUMENT MANAGEMENT
|
||||
============================================
|
||||
|
||||
## Document Control
|
||||
|
||||
* Title: Float Design
|
||||
* Date: 13th November 2017
|
||||
* Author: Matthew Nesbit
|
||||
* Distribution: Design Review Board, Product Management, Services - Technical (Consulting), Platform Delivery
|
||||
* Corda target version: Enterprise
|
||||
|
||||
## Document Sign-off
|
||||
|
||||
* Author: David Lee
|
||||
* Reviewers(s): TBD
|
||||
* Final approver(s): TBD
|
||||
|
||||
## Document History
|
||||
|
||||
# HIGH LEVEL DESIGN
|
||||
.. important:: This design document describes a feature of Corda Enterprise.
|
||||
|
||||
## Overview
|
||||
|
||||
The role of the 'float' is to meet the requirements of organisations that will not allow direct incoming connections to their node, but would rather host a proxy component in a DMZ to achieve this. As such it needs to meet the requirements of modern DMZ security rules, which essentially assume that the entire machine in the DMZ may become compromised. At the same time, we expect that the Float can interoperate with directly connected nodes, possibly even those using open source Corda.
|
||||
The role of the 'float' is to meet the requirements of organisations that will not allow direct incoming connections to
|
||||
their node, but would rather host a proxy component in a DMZ to achieve this. As such it needs to meet the requirements
|
||||
of modern DMZ security rules, which essentially assume that the entire machine in the DMZ may become compromised. At
|
||||
the same time, we expect that the Float can interoperate with directly connected nodes, possibly even those using open
|
||||
source Corda.
|
||||
|
||||
### Background
|
||||
|
||||
@ -36,7 +18,8 @@ The diagram below illustrates the current mechanism for peer-to-peer messaging b
|
||||
|
||||

|
||||
|
||||
When a flow running on a Corda node triggers a requirement to send a message to a peer node, it first checks for pre-existence of an applicable message queue for that peer.
|
||||
When a flow running on a Corda node triggers a requirement to send a message to a peer node, it first checks for
|
||||
pre-existence of an applicable message queue for that peer.
|
||||
|
||||
**If the relevant queue exists:**
|
||||
|
||||
@ -69,49 +52,75 @@ Allow connectivity in compliance with DMZ constraints commonly imposed by modern
|
||||
2. Data passing from the internet and the internal network via the DMZ should pass through a clear protocol break in the DMZ.
|
||||
3. Only identified IPs and ports are permitted to access devices in the DMZ; this include communications between devices colocated in the DMZ.
|
||||
4. Only a limited number of ports are opened in the firewall (<5) to make firewall operation manageable. These ports must change slowly.
|
||||
5. Any DMZ machine is typically multi-homed, with separate network cards handling traffic through the institutional firewall vs. to the Internet. (There is usually a further hidden management interface card accessed via a jump box for managing the box and shipping audit trail information). This requires that our software can bind listening ports to the correct network card not just to 0.0.0.0.
|
||||
6. No connections to be initiated by DMZ devices towards the internal network. Communications should be initiated from the internal network to form a bidirectional channel with the proxy process.
|
||||
5. Any DMZ machine is typically multi-homed, with separate network cards handling traffic through the institutional
|
||||
firewall vs. to the Internet. (There is usually a further hidden management interface card accessed via a jump box for
|
||||
managing the box and shipping audit trail information). This requires that our software can bind listening ports to the
|
||||
correct network card not just to 0.0.0.0.
|
||||
6. No connections to be initiated by DMZ devices towards the internal network. Communications should be initiated from
|
||||
the internal network to form a bidirectional channel with the proxy process.
|
||||
7. No business data should be persisted on the DMZ box.
|
||||
8. An audit log of all connection events is required to track breaches. Latency information should also be tracked to facilitate management of connectivity issues.
|
||||
9. Processes on DMZ devices run as local accounts with no relationship to internal permission systems, or ability to enumerate devices on the internal network.
|
||||
8. An audit log of all connection events is required to track breaches. Latency information should also be tracked to
|
||||
facilitate management of connectivity issues.
|
||||
9. Processes on DMZ devices run as local accounts with no relationship to internal permission systems, or ability to
|
||||
enumerate devices on the internal network.
|
||||
10. Communications in the DMZ should yse modern TLS, often with local-only certificates/keys that hold no value outside of use in predefined links.
|
||||
11. Where TLS is required to terminate on the firewall, provide a suitably secure key management mechanism (e.g. an HSM).
|
||||
12. Any proxy in the DMZ should be subject to the same HA requirements as the devices it is servicing
|
||||
13. Any business data passing through the proxy should be separately encrypted, so that no data is in the clear of the program memory if the DMZ box is compromised.
|
||||
13. Any business data passing through the proxy should be separately encrypted, so that no data is in the clear of the
|
||||
program memory if the DMZ box is compromised.
|
||||
|
||||
## Design Decisions
|
||||
|
||||
The following design decisions are assumed by this design:
|
||||
The following design decisions fed into this design:
|
||||
|
||||
1. [AMQP vs. custom P2P](./decisions/p2p-protocol.md): Use AMQP
|
||||
2. [SSL termination (firewall vs. float)](./decisions/ssl-termination.md): Terminate on firewall; include SASL connection checking
|
||||
3. [End-to-end encryption](./decisions/e2e-encryption.md): Include placeholder only
|
||||
4. [Prioritisation of pluggable broker support](./decisions/pluggable-broker.md): Defer pluggable brokers until later
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
decisions/p2p-protocol.md
|
||||
decisions/ssl-termination.md
|
||||
decisions/e2e-encryption.md
|
||||
decisions/pluggable-broker.md
|
||||
|
||||
## Target Solution
|
||||
|
||||
The proposed solution introduces a reverse proxy component ("**float**") which may be sited in the DMZ, as illustrated in the diagram below.
|
||||
The proposed solution introduces a reverse proxy component ("**float**") which may be sited in the DMZ, as illustrated
|
||||
in the diagram below.
|
||||
|
||||

|
||||
|
||||
The main role of the float is to forward incoming AMQP link packets from authenticated TLS links to the AMQP Bridge Manager, then echo back final delivery acknowledgements once the Bridge Manager has successfully inserted the messages. The Bridge Manager is responsible for rejecting inbound packets on queues that are not local inboxes to prevent e.g. 'cheating' messages onto management topics, faking outgoing messages etc.
|
||||
The main role of the float is to forward incoming AMQP link packets from authenticated TLS links to the AMQP Bridge
|
||||
Manager, then echo back final delivery acknowledgements once the Bridge Manager has successfully inserted the messages.
|
||||
The Bridge Manager is responsible for rejecting inbound packets on queues that are not local inboxes to prevent e.g.
|
||||
'cheating' messages onto management topics, faking outgoing messages etc.
|
||||
|
||||
The float is linked to the internal AMQP Bridge Manager via a single AMQP/TLS connection, which can contain multiple logical AMQP links. This link is initiated at the socket level by the Bridge Manager towards the float.
|
||||
The float is linked to the internal AMQP Bridge Manager via a single AMQP/TLS connection, which can contain multiple
|
||||
logical AMQP links. This link is initiated at the socket level by the Bridge Manager towards the float.
|
||||
|
||||
The float is a **listener only** and does not enable outgoing bridges (see Design Decisions, above). Outgoing bridge formation and message sending come directly from the internal Bridge Manager (possibly via a SOCKS 4/5 proxy, which is easy enough to enable in netty, or directly through the corporate firewall. Initiating from the float gives rise to security concerns.)
|
||||
The float is a **listener only** and does not enable outgoing bridges (see Design Decisions, above). Outgoing bridge
|
||||
formation and message sending come directly from the internal Bridge Manager (possibly via a SOCKS 4/5 proxy, which is
|
||||
easy enough to enable in netty, or directly through the corporate firewall. Initiating from the float gives rise to
|
||||
security concerns.)
|
||||
|
||||
The float is **not mandatory**; interoperability with older nodes, even those using direct AMQP from bridges in the node, is supported.
|
||||
The float is **not mandatory**; interoperability with older nodes, even those using direct AMQP from bridges in the
|
||||
node, is supported.
|
||||
|
||||
**No state will be serialized on the float**, although suitably protected logs will be recorded of all float activities.
|
||||
|
||||
**End-to-end encryption** of the payload is not delivered through this design (see Design Decisions, above). For current purposes, a header field indicating plaintext/encrypted payload is employed as a placeholder.
|
||||
**End-to-end encryption** of the payload is not delivered through this design (see Design Decisions, above). For current
|
||||
purposes, a header field indicating plaintext/encrypted payload is employed as a placeholder.
|
||||
|
||||
**HA** is enabled (this should be easy as the bridge manager can choose which float to make active). Only fully connected DMZ floats should activate their listening port.
|
||||
**HA** is enabled (this should be easy as the bridge manager can choose which float to make active). Only fully
|
||||
connected DMZ floats should activate their listening port.
|
||||
|
||||
Implementation of the float is expected to be based on existing AMQP Bridge Manager code - see Implementation Plan, below, for expected work stages.
|
||||
Implementation of the float is expected to be based on existing AMQP Bridge Manager code - see Implementation Plan,
|
||||
below, for expected work stages.
|
||||
|
||||
### Bridge control protocol
|
||||
The bridge control is designed to be as stateless as possible. Thus, nodes and bridges restarting must re-request/broadcast information to each other. Messages are sent to a 'bridge.control' address in Artemis as non-persistent messages with a non-durable queue. Each message should contain a duplicate message ID, which is also re-used as the correlation id in replies. Relevant scenarios are described below:
|
||||
|
||||
The bridge control is designed to be as stateless as possible. Thus, nodes and bridges restarting must
|
||||
re-request/broadcast information to each other. Messages are sent to a 'bridge.control' address in Artemis as
|
||||
non-persistent messages with a non-durable queue. Each message should contain a duplicate message ID, which is also
|
||||
re-used as the correlation id in replies. Relevant scenarios are described below:
|
||||
|
||||
#### On bridge start-up, or reconnection to Artemis
|
||||
1. The bridge process should subscribe to the 'bridge.control'.
|
||||
@ -137,60 +146,111 @@ The bridge control is designed to be as stateless as possible. Thus, nodes and b
|
||||
5. Future QueueSnapshot requests should be responded to with the new queue included in the list.
|
||||
|
||||
### Behaviour with a Float portion in the DMZ
|
||||
1. On initial connection of an inbound bridge, AMQP is configured to run a SASL challenge response to (re-)validate the origin and confirm the client identity. (The most likely SASL mechanism for this is using https://tools.ietf.org/html/rfc3163 as this allows reuse of our PKI certificates in the challenge response. Potentially we could forward some bridge control messages to cover the SASL exchange to the internal Bridge Controller. This would allow us to keep the private keys internal to the organisation, so we may also require a SASLAuth message type as part of the bridge control protocol.)
|
||||
2. The float restricts acceptable AMQP topics to the name space appropriate for inbound messages only. Hence, there should be no way to tunnel messages to bridge control, or RPC topics on the bus.
|
||||
3. On receipt of a message from the external network, the Float should append a header to link the source channel's X500 name, then create a Delivery for forwarding the message inwards.
|
||||
4. The internal Bridge Control Manager process validates the message further to ensure that it is targeted at a legitimate inbox (i.e. not an outbound queue) and then forwards it to the bus. Once delivered to the broker, the Delivery acknowledgements are cascaded back.
|
||||
|
||||
1. On initial connection of an inbound bridge, AMQP is configured to run a SASL challenge response to (re-)validate the
|
||||
origin and confirm the client identity. (The most likely SASL mechanism for this is using https://tools.ietf.org/html/rfc3163
|
||||
as this allows reuse of our PKI certificates in the challenge response. Potentially we could forward some bridge control
|
||||
messages to cover the SASL exchange to the internal Bridge Controller. This would allow us to keep the private keys
|
||||
internal to the organisation, so we may also require a SASLAuth message type as part of the bridge control protocol.)
|
||||
2. The float restricts acceptable AMQP topics to the name space appropriate for inbound messages only. Hence, there
|
||||
should be no way to tunnel messages to bridge control, or RPC topics on the bus.
|
||||
3. On receipt of a message from the external network, the Float should append a header to link the source channel's X500
|
||||
name, then create a Delivery for forwarding the message inwards.
|
||||
4. The internal Bridge Control Manager process validates the message further to ensure that it is targeted at a legitimate
|
||||
inbox (i.e. not an outbound queue) and then forwards it to the bus. Once delivered to the broker, the Delivery
|
||||
acknowledgements are cascaded back.
|
||||
5. On receiving Delivery notification from the internal side, the Float acknowledges back the correlated original Delivery.
|
||||
6. The Float should protect against excessive inbound messages by AMQP flow control and refusing to accept excessive unacknowledged deliveries.
|
||||
7. The Float only exposes its inbound server socket when activated by a valid AMQP link from the Bridge Control Manager to allow for a simple HA pool of DMZ Float processes. (Floats cannot run hot-hot as this would invalidate Corda's message ordering guarantees.)
|
||||
7. The Float only exposes its inbound server socket when activated by a valid AMQP link from the Bridge Control Manager
|
||||
to allow for a simple HA pool of DMZ Float processes. (Floats cannot run hot-hot as this would invalidate Corda's
|
||||
message ordering guarantees.)
|
||||
|
||||
## Implementation plan
|
||||
|
||||
### Proposed incremental steps towards a float
|
||||
|
||||
# IMPLEMENTATION PLAN
|
||||
1. First, I would like to more explicitly split the RPC and P2P MessagingService instances inside the Node. They can
|
||||
keep the same interface, but this would let us develop P2P and RPC at different rates if required.
|
||||
|
||||
2. The current in-node design with Artemis Core bridges should first be replaced with an equivalent piece of code that
|
||||
initiates send only bridges using an in-house wrapper over the proton-j library. Thus, the current Artemis message
|
||||
objects will be picked up from existing queues using the CORE protocol via an abstraction interface to allow later
|
||||
pluggable replacement. The specific subscribed queues are controlled as before and bridges started by the existing code
|
||||
path. The only difference is the bridges will be the new AMQP client code. The remote Artemis broker should accept
|
||||
transferred packets directly onto its own inbox queue and acknowledge receipt via standard AMQP Delivery notifications.
|
||||
This in turn will be acknowledged back to the Artemis Subscriber to permanently remove the message from the source
|
||||
Artemis queue. The headers for deduplication, address names, etc will need to be mapped to the AMQP messages and we will
|
||||
have to take care about the message payload. This should be an envelope that is capable in the future of being
|
||||
end-to-end encrypted. Where possible we should stay close to the current Artemis mappings.
|
||||
|
||||
## Proposed Incremental Steps Towards a Float
|
||||
1. First, I would like to more explicitly split the RPC and P2P MessagingService instances inside the Node. They can keep the same interface, but this would let us develop P2P and RPC at different rates if required.
|
||||
2. The current in-node design with Artemis Core bridges should first be replaced with an equivalent piece of code that initiates send only bridges using an in-house wrapper over the proton-j library. Thus, the current Artemis message objects will be picked up from existing queues using the CORE protocol via an abstraction interface to allow later pluggable replacement. The specific subscribed queues are controlled as before and bridges started by the existing code path. The only difference is the bridges will be the new AMQP client code. The remote Artemis broker should accept transferred packets directly onto its own inbox queue and acknowledge receipt via standard AMQP Delivery notifications. This in turn will be acknowledged back to the Artemis Subscriber to permanently remove the message from the source Artemis queue. The headers for deduplication, address names, etc will need to be mapped to the AMQP messages and we will have to take care about the message payload. This should be an envelope that is capable in the future of being end-to-end encrypted. Where possible we should stay close to the current Artemis mappings.
|
||||
3. We need to define a bridge control protocol, so that we can have an out of process float/bridge. The current process is that on message send the node checks the target address to see if the target queue already exists. If the queue doesn't exist it creates a new queue which includes an encoding of the PublicKey in its name. This is picked up by a wrapper around the Artemis Server which is also hosted inside the node and can ask the network map cache for a translation to a target host and port. This in turn allows a new bridge to be provisioned. At node restart the re-population of the network map cache is followed to re-create the bridges to any unsent queues/messages.
|
||||
4. My proposal for a bridge control protocol is partly influenced by the fact that AMQP does not have a built-in mechanism for queue creation/deletion/enumeration. Also, the flows cannot progress until they are sure that there is an accepting queue. Finally, if one runs a local broker it should be fine to run multiple nodes without any bridge processes. Therefore, I will leave the queue creation as the node's responsibility. Initially we can continue to use the existing CORE protocol for this. The requirement to initiate a bridge will change from being implicit signalling via server queue detection to being an explicit pub-sub message that requests bridge formation. This doesn't need durability, or acknowledgements, because when a bridge process starts it should request a refresh of the required bridge list. The typical create bridge messages should contain:
|
||||
1. The queue name (ideally with the sha256 of the PublicKey, not the whole PublicKey as that may not work on brokers with queue name length constraints).
|
||||
2. The expected X500Name for the remote TLS certificate.
|
||||
3. The list of host and ports to attempt connection to. See separate section for more info.
|
||||
5. Once we have the bridge protocol in place and a bridge out of process the broker can move out of process too, which is a requirement for clustering anyway. We can then start work on floating the bridge and making our broker pluggable.
|
||||
1. At this point the bridge connection to the local queues should be upgraded to also be AMQP client, rather than CORE protocol, which will give the ability for the P2P bridges to work with other broker products.
|
||||
2. An independent task is to look at making the Bridge process HA, probably using a similar hot-warm mastering solution as the node, or atomix.io. The inactive node should track the control messages, but obviously doesn't initiate any bridges.
|
||||
3. Another potentially parallel piece of development is to start to build a float, which is essentially just splitting the bridge in two and putting in an intermediate hop AMQP/TLS link. The thin proxy in the DMZ zone should be as stateless as possible in this.
|
||||
4. Finally, the node should use AMQP to talk to its local broker cluster, but this will have to remain partly tied to Artemis, as queue creation will require sending management messages to the Artemis core, but we should be able to abstract this. Bridge Management Protocol.
|
||||
3. We need to define a bridge control protocol, so that we can have an out of process float/bridge. The current process
|
||||
is that on message send the node checks the target address to see if the target queue already exists. If the queue
|
||||
doesn't exist it creates a new queue which includes an encoding of the PublicKey in its name. This is picked up by a
|
||||
wrapper around the Artemis Server which is also hosted inside the node and can ask the network map cache for a
|
||||
translation to a target host and port. This in turn allows a new bridge to be provisioned. At node restart the
|
||||
re-population of the network map cache is followed to re-create the bridges to any unsent queues/messages.
|
||||
|
||||
## Float evolution
|
||||
4. My proposal for a bridge control protocol is partly influenced by the fact that AMQP does not have a built-in
|
||||
mechanism for queue creation/deletion/enumeration. Also, the flows cannot progress until they are sure that there is an
|
||||
accepting queue. Finally, if one runs a local broker it should be fine to run multiple nodes without any bridge
|
||||
processes. Therefore, I will leave the queue creation as the node's responsibility. Initially we can continue to use the
|
||||
existing CORE protocol for this. The requirement to initiate a bridge will change from being implicit signalling via
|
||||
server queue detection to being an explicit pub-sub message that requests bridge formation. This doesn't need
|
||||
durability, or acknowledgements, because when a bridge process starts it should request a refresh of the required bridge
|
||||
list. The typical create bridge messages should contain:
|
||||
|
||||
1. The queue name (ideally with the sha256 of the PublicKey, not the whole PublicKey as that may not work on brokers with queue name length constraints).
|
||||
2. The expected X500Name for the remote TLS certificate.
|
||||
3. The list of host and ports to attempt connection to. See separate section for more info.
|
||||
|
||||
5. Once we have the bridge protocol in place and a bridge out of process the broker can move out of process too, which
|
||||
is a requirement for clustering anyway. We can then start work on floating the bridge and making our broker pluggable.
|
||||
|
||||
1. At this point the bridge connection to the local queues should be upgraded to also be AMQP client, rather than CORE
|
||||
protocol, which will give the ability for the P2P bridges to work with other broker products.
|
||||
2. An independent task is to look at making the Bridge process HA, probably using a similar hot-warm mastering solution
|
||||
as the node, or atomix.io. The inactive node should track the control messages, but obviously doesn't initiate any
|
||||
bridges.
|
||||
3. Another potentially parallel piece of development is to start to build a float, which is essentially just splitting
|
||||
the bridge in two and putting in an intermediate hop AMQP/TLS link. The thin proxy in the DMZ zone should be as
|
||||
stateless as possible in this.
|
||||
4. Finally, the node should use AMQP to talk to its local broker cluster, but this will have to remain partly tied
|
||||
to Artemis, as queue creation will require sending management messages to the Artemis core, but we should be
|
||||
able to abstract this.
|
||||
|
||||
### Float evolution
|
||||
|
||||
#### In-Process AMQP Bridging
|
||||
|
||||
### In-Process AMQP Bridging
|
||||

|
||||
|
||||
1. In this phase of evolution we hook the same bridge creation code as before and use the same in-process data access to network map cache.
|
||||
2. However, we now implement AMQP sender clients using proton-j and netty for TLS layer and connection retry.
|
||||
3. This will also involve formalising the AMQP packet format of the Corda P2P protocol.
|
||||
4. Once a bridge makes a successful link to a remote node's Artemis broker it will subscribe to the associated local queue.
|
||||
5. The messages will be picked up from the local broker via an Artemis CORE consumer for simplicity of initial implementation.
|
||||
6. The queue consumer should be implemented with a simple generic interface as façade, to allow future replacement.
|
||||
7. The message will be sent across the AMQP protocol directly to the remote Artemis broker.
|
||||
8. Once acknowledgement of receipt is given with an AMQP Delivery notification the queue consumption will be acknowledged.
|
||||
9. This will remove the original item from the source queue.
|
||||
10. If delivery fails due to link loss the subscriber should be closed until a new link is established to ensure messages are not consumed.
|
||||
11. If delivery fails for other reasons there should be some for of periodic retry over the AMQP link.
|
||||
12. For authentication checks the client cert returned from the remote server will be checked and the link dropped if it doesn't match expectations.
|
||||
In this phase of evolution we hook the same bridge creation code as before and use the same in-process data access to
|
||||
network map cache. However, we now implement AMQP sender clients using proton-j and netty for TLS layer and connection
|
||||
retry. This will also involve formalising the AMQP packet format of the Corda P2P protocol. Once a bridge makes a
|
||||
successful link to a remote node's Artemis broker it will subscribe to the associated local queue. The messages will be
|
||||
picked up from the local broker via an Artemis CORE consumer for simplicity of initial implementation. The queue
|
||||
consumer should be implemented with a simple generic interface as façade, to allow future replacement. The message will
|
||||
be sent across the AMQP protocol directly to the remote Artemis broker. Once acknowledgement of receipt is given with an
|
||||
AMQP Delivery notification the queue consumption will be acknowledged. This will remove the original item from the
|
||||
source queue. If delivery fails due to link loss the subscriber should be closed until a new link is established to
|
||||
ensure messages are not consumed. If delivery fails for other reasons there should be some for of periodic retry over
|
||||
the AMQP link. For authentication checks the client cert returned from the remote server will be checked and the link
|
||||
dropped if it doesn't match expectations.
|
||||
|
||||
### Out of process Artemis Broker and Bridges
|
||||
#### Out of process Artemis Broker and Bridges
|
||||

|
||||
|
||||
1. Move the Artemis broker and bridge formation logic out of the node. This requires formalising the bridge creation requests, but allows clustered brokers, standardised AMQP usage and ultimately pluggable brokers.
|
||||
2. We should implement a netty socket server on the bridge and forward authenticated packets to the local Artemis broker inbound queues. An AMQP server socket is required for the float, although it should be transparent whether a NodeInfo refers to a bridge socket address, or an Artemis broker.
|
||||
3. The queue names should use the sha-256 of the PublicKey not the full key. Also, the name should be used for in and out queues, so that multiple distinct nodes can coexist on the same broker. This will simplify development as developers just run a background broker and shouldn't need to restart it.
|
||||
4. To export the network map information and to initiate bridges a non-durable bridge control protocol will be needed (in blue). Essentially the messages declare the local queue names and target TLS link information. For in-bound messages only messages for known inbox targets will be acknowledged.
|
||||
5. It should not be hard to make the bridges active-passive HA as they contain no persisted message state and simple RPC can resync the state of the bridge.
|
||||
6. Queue creation will remain with the node as this must use non-AMQP mechanisms and because flows should be able to queue sent messages even if the bridge is temporarily down.
|
||||
7. In parallel work can start to upgrade the local links to Artemis (i.e. the node-Artemis link and the Bridge Manager-Artemis link) to be AMQP clients as much as possible.
|
||||
|
||||
### Full float implementation
|
||||
As described in the 'Target Solution' section, above.
|
||||
Move the Artemis broker and bridge formation logic out of the node. This requires formalising the bridge creation
|
||||
requests, but allows clustered brokers, standardised AMQP usage and ultimately pluggable brokers. We should implement a
|
||||
netty socket server on the bridge and forward authenticated packets to the local Artemis broker inbound queues. An AMQP
|
||||
server socket is required for the float, although it should be transparent whether a NodeInfo refers to a bridge socket
|
||||
address, or an Artemis broker. The queue names should use the sha-256 of the PublicKey not the full key. Also, the name
|
||||
should be used for in and out queues, so that multiple distinct nodes can coexist on the same broker. This will simplify
|
||||
development as developers just run a background broker and shouldn't need to restart it. To export the network map
|
||||
information and to initiate bridges a non-durable bridge control protocol will be needed (in blue). Essentially the
|
||||
messages declare the local queue names and target TLS link information. For in-bound messages only messages for known
|
||||
inbox targets will be acknowledged. It should not be hard to make the bridges active-passive HA as they contain no
|
||||
persisted message state and simple RPC can resync the state of the bridge. Queue creation will remain with the node as
|
||||
this must use non-AMQP mechanisms and because flows should be able to queue sent messages even if the bridge is
|
||||
temporarily down. In parallel work can start to upgrade the local links to Artemis (i.e. the node-Artemis link and the
|
||||
Bridge Manager-Artemis link) to be AMQP clients as much as possible.
|
||||
|
@ -1,14 +1,8 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Node starting & stopping
|
||||
============================================
|
||||
# Design Decision: Node starting & stopping
|
||||
|
||||
## Background / Context
|
||||
|
||||
The potential use of a crash shell is relevant to [high availability](../design.md) capabilities of nodes.
|
||||
|
||||
|
||||
The potential use of a crash shell is relevant to high availability capabilities of nodes.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
@ -49,4 +43,8 @@ Proceed with Option 2: Delegate to external tools
|
||||
|
||||
## Decision taken
|
||||
|
||||
**[DRB meeting, 16/11/2017:](./drb-meeting-20171116.md)** Restarts should be handled by polite shutdown, followed by a hard clear. (RGB, JC, MH agreed)
|
||||
Restarts should be handled by polite shutdown, followed by a hard clear. (RGB, JC, MH agreed)
|
||||
|
||||
.. toctree::
|
||||
|
||||
drb-meeting-20171116.md
|
||||
|
@ -1,14 +1,9 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Message storage
|
||||
============================================
|
||||
# Design Decision: Message storage
|
||||
|
||||
## Background / Context
|
||||
|
||||
Storage of messages by the message broker has implications for replication technologies which can be used to ensure both [high availability](../design.md) and disaster recovery of Corda nodes.
|
||||
|
||||
|
||||
Storage of messages by the message broker has implications for replication technologies which can be used to ensure both
|
||||
[high availability](../design.md) and disaster recovery of Corda nodes.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
@ -44,4 +39,8 @@ Continue with Option 1: Storage in the file system
|
||||
|
||||
## Decision taken
|
||||
|
||||
[DRB meeting, 16/11/2017:](./drb-meeting-20171116.md) Use storage in the file system (for now)
|
||||
Use storage in the file system (for now)
|
||||
|
||||
.. toctree::
|
||||
|
||||
drb-meeting-20171116.md
|
||||
|
@ -1,13 +1,7 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Review Board Meeting Minutes
|
||||
============================================
|
||||
# Design Review Board Meeting Minutes
|
||||
|
||||
**Date / Time:** 16/11/2017, 16:30
|
||||
|
||||
|
||||
|
||||
## Attendees
|
||||
|
||||
- Mark Oldfield (MO)
|
||||
@ -24,9 +18,7 @@ Design Review Board Meeting Minutes
|
||||
- Jonathan Sartin (JS)
|
||||
- David Lee (DL)
|
||||
|
||||
|
||||
|
||||
## **Minutes**
|
||||
## Minutes
|
||||
|
||||
The meeting re-opened following prior discussion of the float design.
|
||||
|
||||
|
@ -1,14 +1,9 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Broker separation
|
||||
============================================
|
||||
# Design Decision: Broker separation
|
||||
|
||||
## Background / Context
|
||||
|
||||
A decision of whether to extract the Artemis message broker as a separate component has implications for the design of [high availability](../design.md) for nodes.
|
||||
|
||||
|
||||
A decision of whether to extract the Artemis message broker as a separate component has implications for the design of
|
||||
[high availability](../design.md) for nodes.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
@ -16,15 +11,15 @@ A decision of whether to extract the Artemis message broker as a separate compon
|
||||
|
||||
#### Advantages
|
||||
|
||||
1. Least change
|
||||
1. Least change
|
||||
|
||||
#### Disadvantages
|
||||
|
||||
1. Means that starting/stopping Corda is tightly coupled to starting/stopping Artemis instances.
|
||||
2. Risks resource leaks from one system component affecting other components.
|
||||
3. Not pluggable if we wish to have an alternative broker.
|
||||
1. Means that starting/stopping Corda is tightly coupled to starting/stopping Artemis instances.
|
||||
2. Risks resource leaks from one system component affecting other components.
|
||||
3. Not pluggable if we wish to have an alternative broker.
|
||||
|
||||
## 2. External broker
|
||||
### 2. External broker
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -46,4 +41,8 @@ Proceed with Option 2: External broker
|
||||
|
||||
## Decision taken
|
||||
|
||||
**[DRB meeting, 16/11/2017:](./drb-meeting-20171116.md)** The broker should only be separated if required by other features (e.g. the float), otherwise not. (RGB, JC, MH agreed).
|
||||
The broker should only be separated if required by other features (e.g. the float), otherwise not. (RGB, JC, MH agreed).
|
||||
|
||||
.. toctree::
|
||||
|
||||
drb-meeting-20171116.md
|
||||
|
@ -1,14 +1,8 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: IP addressing mechanism (near-term)
|
||||
============================================
|
||||
# Design Decision: IP addressing mechanism (near-term)
|
||||
|
||||
## Background / Context
|
||||
|
||||
End-to-end encryption is a desirable potential design feature for the [float](../design.md).
|
||||
|
||||
|
||||
End-to-end encryption is a desirable potential design feature for the [high availability support](design).
|
||||
|
||||
## Options Analysis
|
||||
|
||||
@ -45,4 +39,8 @@ Proceed with Option 1: Via Load Balancer
|
||||
|
||||
## Decision taken
|
||||
|
||||
**[DRB meeting, 16/11/2017:](./drb-meeting-20171116.md)** The design can allow for optional load balancers to be implemented by clients. (RGB, JC, MH agreed)
|
||||
The design can allow for optional load balancers to be implemented by clients. (RGB, JC, MH agreed)
|
||||
|
||||
.. toctree::
|
||||
|
||||
drb-meeting-20171116.md
|
||||
|
@ -1,18 +1,14 @@
|
||||

|
||||
|
||||
------
|
||||
|
||||
# Design Decision: Medium-term target for node HA
|
||||
|
||||
## Background / Context
|
||||
|
||||
Designing for high availability is a complex task which can only be delivered over an operationally-significant timeline. It is therefore important to determine whether an intermediate state design (deliverable for around March 2018) is desirable as a precursor to longer term outcomes.
|
||||
|
||||
|
||||
Designing for high availability is a complex task which can only be delivered over an operationally-significant
|
||||
timeline. It is therefore important to determine whether an intermediate state design (deliverable for around March
|
||||
2018) is desirable as a precursor to longer term outcomes.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
### 1. Hot-warm as interim state (see [HA design doc](../design.md))
|
||||
### 1. Hot-warm as interim state
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -26,7 +22,7 @@ Designing for high availability is a complex task which can only be delivered ov
|
||||
2. May actually turn out more risky than hot-hot, because shutting down code is always prone to deadlocks and resource leakages.
|
||||
3. Some work would have to be thrown away when we create a full hot-hot solution.
|
||||
|
||||
### 2. Progress immediately to Hot-hot (see [HA design doc](../design.md))
|
||||
### 2. Progress immediately to Hot-hot
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -45,5 +41,9 @@ Proceed with Option 1: Hot-warm as interim state.
|
||||
|
||||
## Decision taken
|
||||
|
||||
**[DRB meeting, 16/11/2017:](./drb-meeting-20171116.md)** Adopt option 1: Medium-term target: Hot Warm (RGB, JC, MH agreed)
|
||||
Adopt option 1: Medium-term target: Hot Warm (RGB, JC, MH agreed)
|
||||
|
||||
.. toctree::
|
||||
|
||||
drb-meeting-20171116.md
|
||||
|
||||
|
@ -1,14 +1,10 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Near-term target for node HA
|
||||
============================================
|
||||
# Design Decision: Near-term target for node HA
|
||||
|
||||
## Background / Context
|
||||
|
||||
Designing for high availability is a complex task which can only be delivered over an operationally-significant timeline. It is therefore important to determine the target state in the near term as a precursor to longer term outcomes.
|
||||
|
||||
|
||||
Designing for high availability is a complex task which can only be delivered over an operationally-significant
|
||||
timeline. It is therefore important to determine the target state in the near term as a precursor to longer term
|
||||
outcomes.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
@ -43,4 +39,8 @@ Proceed with Option 2: Hot-cold.
|
||||
|
||||
## Decision taken
|
||||
|
||||
**[DRB meeting, 16/11/2017:](./drb-meeting-20171116.md)** Adopt option 2: Near-term target: Hot Cold (RGB, JC, MH agreed)
|
||||
Adopt option 2: Near-term target: Hot Cold (RGB, JC, MH agreed)
|
||||
|
||||
.. toctree::
|
||||
|
||||
drb-meeting-20171116.md
|
||||
|
@ -1,43 +1,30 @@
|
||||

|
||||
# High availability support
|
||||
|
||||
# High Availability Support for Corda: A Phased Approach
|
||||
|
||||
-------------------
|
||||
DOCUMENT MANAGEMENT
|
||||
===================
|
||||
|
||||
## Document Control
|
||||
|
||||
* High Availability and Disaster Recovery for Corda: A Phased Approach
|
||||
* Date: 13th November 2017
|
||||
* Author: Matthew Nesbit
|
||||
* Distribution: Design Review Board, Product Management, Services - Technical (Consulting), Platform Delivery
|
||||
* Corda target version: Enterprise
|
||||
|
||||
## Document Sign-off
|
||||
|
||||
* Author: David Lee
|
||||
* Reviewers(s): TBD
|
||||
* Final approver(s): TBD
|
||||
|
||||
## Document History
|
||||
|
||||
--------------------------------------------
|
||||
HIGH LEVEL DESIGN
|
||||
============================================
|
||||
.. important:: This design document describes a feature of Corda Enterprise.
|
||||
|
||||
## Overview
|
||||
### Background
|
||||
|
||||
The term high availability (HA) is used in this document to refer to the ability to rapidly handle any single component failure, whether due to physical issues (e.g. hard drive failure), network connectivity loss, or software faults.
|
||||
The term high availability (HA) is used in this document to refer to the ability to rapidly handle any single component
|
||||
failure, whether due to physical issues (e.g. hard drive failure), network connectivity loss, or software faults.
|
||||
|
||||
Expectations of HA in modern enterprise systems are for systems to recover normal operation in a few minutes at most, while ensuring minimal/zero data loss. Whilst overall reliability is the overriding objective, it is desirable for Corda to offer HA mechanisms which are both highly automated and transparent to node operators. HA mechanism must not involve any configuration changes that require more than an appropriate admin tool, or a simple start/stop of a process as that would need an Emergency Change Request.
|
||||
Expectations of HA in modern enterprise systems are for systems to recover normal operation in a few minutes at most,
|
||||
while ensuring minimal/zero data loss. Whilst overall reliability is the overriding objective, it is desirable for Corda
|
||||
to offer HA mechanisms which are both highly automated and transparent to node operators. HA mechanism must not involve
|
||||
any configuration changes that require more than an appropriate admin tool, or a simple start/stop of a process as that
|
||||
would need an Emergency Change Request.
|
||||
|
||||
HA naturally grades into requirements for Disaster Recovery (DR), which requires that there is a tested procedure to handle large scale multi-component failures e.g. due to data centre flooding, acts of terrorism. DR processes are permitted to involve significant manual intervention, although the complications of actually invoking a Business Continuity Plan (BCP) mean that the less manual intervention, the more competitive Corda will be in the modern vendor market.
|
||||
For modern financial institutions, maintaining comprehensive and effective BCP procedures are a legal requirement which is generally tested at least once a year.
|
||||
HA naturally grades into requirements for Disaster Recovery (DR), which requires that there is a tested procedure to
|
||||
handle large scale multi-component failures e.g. due to data centre flooding, acts of terrorism. DR processes are
|
||||
permitted to involve significant manual intervention, although the complications of actually invoking a Business
|
||||
Continuity Plan (BCP) mean that the less manual intervention, the more competitive Corda will be in the modern vendor
|
||||
market. For modern financial institutions, maintaining comprehensive and effective BCP procedures are a legal
|
||||
requirement which is generally tested at least once a year.
|
||||
|
||||
However, until Corda is the system of record, or the primary system for transactions we are unlikely to be required to have any kind of fully automatic DR. In fact, we are likely to be restarted only once BCP has restored the most critical systems.
|
||||
In contrast, typical financial institutions maintain large, complex technology landscapes in which individual component failures can occur, such as:
|
||||
However, until Corda is the system of record, or the primary system for transactions we are unlikely to be required to
|
||||
have any kind of fully automatic DR. In fact, we are likely to be restarted only once BCP has restored the most critical
|
||||
systems. In contrast, typical financial institutions maintain large, complex technology landscapes in which individual
|
||||
component failures can occur, such as:
|
||||
|
||||
* Small scale software failures
|
||||
* Mandatory data centre power cycles
|
||||
@ -50,10 +37,11 @@ Thus, HA is essential for enterprise Corda and providing help to administrators
|
||||
|
||||
### Current node topology
|
||||
|
||||

|
||||

|
||||
|
||||
The current solution has a single integrated process running in one JVM including
|
||||
Artemis, H2 database, Flow State Machine, P2P bridging. All storage is on the local file system. There is no HA capability other than manual restart of the node following failure.
|
||||
The current solution has a single integrated process running in one JVM including Artemis, H2 database, Flow State
|
||||
Machine, P2P bridging. All storage is on the local file system. There is no HA capability other than manual restart of
|
||||
the node following failure.
|
||||
|
||||
#### Limitations
|
||||
|
||||
@ -70,60 +58,81 @@ Artemis, H2 database, Flow State Machine, P2P bridging. All storage is on the lo
|
||||
|
||||
## Requirements
|
||||
### Goals
|
||||
* A logical Corda node should continue to function in the event of an individual component failure or (e.g.) restart.
|
||||
* No loss, corruption or duplication of data on the ledger due to component outages
|
||||
* Ensure continuity of flows throughout any disruption
|
||||
* Support software upgrades in a live network
|
||||
|
||||
### Goals (out of scope for this design document)
|
||||
* Be able to distribute a node over more than two datacenters.
|
||||
* Be able to distribute a node between datacenters that are very far apart latency-wise (unless you don't care about performance).
|
||||
* Be able to tolerate arbitrary byzantine failures within a node cluster.
|
||||
* DR, specifically in the case of the complete failure of a site/datacentre/cluster or region will require a different solution to that specified here. For now DR is only supported where performant synchronous replication is feasible i.e. sites only a few miles apart.
|
||||
* A logical Corda node should continue to function in the event of an individual component failure or (e.g.) restart.
|
||||
* No loss, corruption or duplication of data on the ledger due to component outages
|
||||
* Ensure continuity of flows throughout any disruption
|
||||
* Support software upgrades in a live network
|
||||
|
||||
### Non-goals (out of scope for this design document)
|
||||
|
||||
* Be able to distribute a node over more than two datacenters.
|
||||
* Be able to distribute a node between datacenters that are very far apart latency-wise (unless you don't care about performance).
|
||||
* Be able to tolerate arbitrary byzantine failures within a node cluster.
|
||||
* DR, specifically in the case of the complete failure of a site/datacentre/cluster or region will require a different
|
||||
solution to that specified here. For now DR is only supported where performant synchronous replication is feasible
|
||||
i.e. sites only a few miles apart.
|
||||
|
||||
## Timeline
|
||||
|
||||
This design document outlines a range of topologies which will be enabled through progressive enhancements from the short to long term.
|
||||
This design document outlines a range of topologies which will be enabled through progressive enhancements from the
|
||||
short to long term.
|
||||
|
||||
On the timescales available for the current production pilot deployments we clearly do not have time to reach the ideal of a highly fault tolerant, horizontally scaled Corda.
|
||||
On the timescales available for the current production pilot deployments we clearly do not have time to reach the ideal
|
||||
of a highly fault tolerant, horizontally scaled Corda.
|
||||
|
||||
Instead, I suggest that we can only achieve the simplest state of a standby Corda installation only by January 5th and even this is contingent on other enterprise features, such as external database and network map stabilisation being completed on this timescale, plus any issues raised by testing.
|
||||
Instead, I suggest that we can only achieve the simplest state of a standby Corda installation only by January 5th and
|
||||
even this is contingent on other enterprise features, such as external database and network map stabilisation being
|
||||
completed on this timescale, plus any issues raised by testing.
|
||||
|
||||
For the March 31st timeline, I hope that we can achieve a more fully automatic node failover state, with the Artemis broker running as a cluster too. I include a diagram of a fully scaled Corda for completeness and so that I can discuss what work is re-usable/throw away.
|
||||
For the Enterprise GA timeline, I hope that we can achieve a more fully automatic node failover state, with the Artemis
|
||||
broker running as a cluster too. I include a diagram of a fully scaled Corda for completeness and so that I can discuss
|
||||
what work is re-usable/throw away.
|
||||
|
||||
With regards to DR it is unclear how this would work where synchronous replication is not feasible. At this point we can only investigate approaches as an aside to the main thrust of work for HA support. In the synchronous replication mode it is assumed that the file and database replication can be used to ensure a cold DR backup.
|
||||
With regards to DR it is unclear how this would work where synchronous replication is not feasible. At this point we can
|
||||
only investigate approaches as an aside to the main thrust of work for HA support. In the synchronous replication mode
|
||||
it is assumed that the file and database replication can be used to ensure a cold DR backup.
|
||||
|
||||
## Design Decisions
|
||||
|
||||
The following design decisions are assumed by this design:
|
||||
|
||||
1. [Near-term-target](./decisions/near-term-target.md): Hot-Cold HA (see below)
|
||||
2. [Medium-term target](./decisions/medium-term-target.md): Hot-Warm HA (see below)
|
||||
3. [External broker](./decisions/external-broker.md): Yes
|
||||
4. [Database message store](./decisions/db-msg-store.md): No
|
||||
5. [IP addressing mechanism](./decisions/ip-addressing.md): Load balancer
|
||||
6. [Crash shell start/stop](./decisions/crash-shell.md): No
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
decisions/near-term-target.md
|
||||
decisions/medium-term-target.md
|
||||
decisions/external-broker.md
|
||||
decisions/db-msg-store.md
|
||||
decisions/ip-addressing.md
|
||||
decisions/crash-shell.md
|
||||
|
||||
## Target Solution
|
||||
|
||||
|
||||
### Hot-Cold (minimum requirement)
|
||||

|
||||

|
||||
|
||||
Small scale software failures on a node are recovered from locally via restarting/re-setting the offending component by the external (to JVM) "Health Watchdog" (HW) process. The HW process (eg a shell script or similar) would monitor parameters for java processes by periodically query them (sleep period a few seconds). This may require introduction of a few monitoring 'hooks' into Corda codebase or a "health" CorDapp the HW script can interface with. There would be a back-off logic to prevent continues restarts in the case of persistent failure.
|
||||
Small scale software failures on a node are recovered from locally via restarting/re-setting the offending component by
|
||||
the external (to JVM) "Health Watchdog" (HW) process. The HW process (eg a shell script or similar) would monitor
|
||||
parameters for java processes by periodically query them (sleep period a few seconds). This may require introduction of
|
||||
a few monitoring 'hooks' into Corda codebase or a "health" CorDapp the HW script can interface with. There would be a
|
||||
back-off logic to prevent continues restarts in the case of persistent failure.
|
||||
|
||||
We would provide a fully-functional sample HW script for Linux/Unix deployment platforms.
|
||||
|
||||
The hot-cold design provides a backup VM and Corda deployment instance that can be manually started if the primary is stopped. The failed primary must be killed to ensure it is fully stopped.
|
||||
The hot-cold design provides a backup VM and Corda deployment instance that can be manually started if the primary is
|
||||
stopped. The failed primary must be killed to ensure it is fully stopped.
|
||||
|
||||
For single-node deployment scenarios the simplest supported way to recover from failures is to re-start the entire set of Corda Node processes or reboot the node OS.
|
||||
For single-node deployment scenarios the simplest supported way to recover from failures is to re-start the entire set
|
||||
of Corda Node processes or reboot the node OS.
|
||||
|
||||
For a 2-node HA deployment scenario a load balancer determines which node is active and routes traffic to that node.
|
||||
The load balancer will need to monitor the health of the primary and secondary nodes and automatically route traffic from the public IP address to the only active end-point. An external solution is required for the load balancer and health monitor. In the case of Azure cloud deployments, no custom code needs to be developed to support the health monitor.
|
||||
For a 2-node HA deployment scenario a load balancer determines which node is active and routes traffic to that node. The
|
||||
load balancer will need to monitor the health of the primary and secondary nodes and automatically route traffic from
|
||||
the public IP address to the only active end-point. An external solution is required for the load balancer and health
|
||||
monitor. In the case of Azure cloud deployments, no custom code needs to be developed to support the health monitor.
|
||||
|
||||
An additional component will be written to prevent accidental dual running which is likely to make use of a database heartbeat table. Code size should be minimal.
|
||||
An additional component will be written to prevent accidental dual running which is likely to make use of a database
|
||||
heartbeat table. Code size should be minimal.
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -146,7 +155,7 @@ An additional component will be written to prevent accidental dual running which
|
||||
- Health reporting and process controls need to be developed by the customer.
|
||||
|
||||
### Hot-Warm (Medium-term solution)
|
||||

|
||||

|
||||
|
||||
Hot-warm aims to automate failover and provide failover of individual major components e.g. Artemis.
|
||||
|
||||
@ -154,9 +163,10 @@ It involves Two key changes to the hot-cold design:
|
||||
1) Separation and clustering of the Artemis broker.
|
||||
2) Start and stop of flow processing without JVM exit.
|
||||
|
||||
The consequences of these changes are that peer to peer bridging is separated from the node and a bridge control protocol must be developed.
|
||||
A leader election component is a pre-cursor to load balancing – likely to be a combination of custom code and standard library and, in the short term, is likely to be via the database.
|
||||
Cleaner handling of disconnects from the external components (Artemis and the database) will also be needed.
|
||||
The consequences of these changes are that peer to peer bridging is separated from the node and a bridge control
|
||||
protocol must be developed. A leader election component is a pre-cursor to load balancing – likely to be a combination
|
||||
of custom code and standard library and, in the short term, is likely to be via the database. Cleaner handling of
|
||||
disconnects from the external components (Artemis and the database) will also be needed.
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -175,13 +185,15 @@ Cleaner handling of disconnects from the external components (Artemis and the da
|
||||
- No horizontal scaling support.
|
||||
- Deployment of master and slave may not be completely symmetric.
|
||||
- Care must be taken with upgrades to ensure master/slave election operates across updates.
|
||||
- Artemis clustering does require a designated master at start-up of its cluster hence any restart involving changing the primary node will require configuration management.
|
||||
- Artemis clustering does require a designated master at start-up of its cluster hence any restart involving changing
|
||||
the primary node will require configuration management.
|
||||
- The development effort is much more significant than the hot-cold configuration.
|
||||
|
||||
### Hot-Hot (Long-term strategic solution)
|
||||

|
||||

|
||||
|
||||
In this configuration, all nodes are actively processing work and share a clustered database. A mechanism for sharding or distributing the work load will need to be developed.
|
||||
In this configuration, all nodes are actively processing work and share a clustered database. A mechanism for sharding
|
||||
or distributing the work load will need to be developed.
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -197,40 +209,76 @@ In this configuration, all nodes are actively processing work and share a cluste
|
||||
- Will require handling of more states than just checkpoints e.g. soft locks and RPC subscriptions.
|
||||
- Single flows will not be active on multiple nodes without future development work.
|
||||
|
||||
--------------------------------------------
|
||||
IMPLEMENTATION PLAN
|
||||
============================================
|
||||
## Implementation plan
|
||||
|
||||
## Transitioning from Corda 2.0 to Manually Activated HA
|
||||
### Transitioning from Corda 2.0 to Manually Activated HA
|
||||
|
||||
The current Corda is built to run as a fully contained single process with the Flow logic, H2 database and Artemis broker all bundled together. This limits the options for automatic replication, or subsystem failure. Thus, we must use external mechanisms to replicate the data in the case of failure. We also should ensure that accidental dual start is not possible in case of mistakes, or slow shutdown of the primary.
|
||||
The current Corda is built to run as a fully contained single process with the Flow logic, H2 database and Artemis
|
||||
broker all bundled together. This limits the options for automatic replication, or subsystem failure. Thus, we must use
|
||||
external mechanisms to replicate the data in the case of failure. We also should ensure that accidental dual start is
|
||||
not possible in case of mistakes, or slow shutdown of the primary.
|
||||
|
||||
Based on this situation, I suggest the following minimum development tasks are required for a tested HA deployment:
|
||||
|
||||
1. Complete and merge JDBC support for an external clustered database. Azure SQL Server has been identified as the most likely initial deployment. With this we should be able to point at an HA database instance for Ledger and Checkpoint data.
|
||||
2. I am suggesting that for the near term we just use the Azure Load Balancer to hide the multiple machine addresses. This does require allowing a health monitoring link to the Artemis broker, but so far testing indicates that this operates without issue. Longer term we need to ensure that the network map and configuration support exists for the system to work with multiple TCP/IP endpoints advertised to external nodes. Ideally this should be rolled into the work for AMQP bridges and Floats.
|
||||
3. Implement a very simple mutual exclusion feature, so that an enterprise node cannot start if another is running onto the same database. This can be via a simple heartbeat update in the database, or possibly some other library. This feature should be enabled only when specified by configuration.
|
||||
4. The replication of the Artemis Message Queues will have to be via an external mechanism. On Azure we believe that the only practical solution is the 'Azure Files' approach which maps a virtual Samba drive. This we are testing in-case it is too slow to work. The mounting of separate Data Disks is possible, but they can only be mounted to one VM at a time, so they would not be compatible with the goal of no change requests for HA.
|
||||
5. Improve health monitoring to better indicate fault failure. Extending the existing JMX and logging support should achieve this, although we probably need to create watchdog CordApp that verifies that the State Machine and Artemis messaging are able to process new work and to monitor flow latency.
|
||||
6. Test the checkpointing mechanism and confirm that failures don't corrupt the data by deploying an HA setup on Azure and driving flows through the system as we stop the node randomly and switch to the other node. If this reveals any issues we will have to fix them.
|
||||
7. Confirm that the behaviour of the RPC Client API is stable through these restarts, from the perspective of a stateless REST server calling through to RPC. The RPC API should provide positive feedback to the application, so that it can respond in a controlled fashion when disconnected.
|
||||
1. Complete and merge JDBC support for an external clustered database. Azure SQL Server has been identified as the most
|
||||
likely initial deployment. With this we should be able to point at an HA database instance for Ledger and Checkpoint data.
|
||||
2. I am suggesting that for the near term we just use the Azure Load Balancer to hide the multiple machine addresses.
|
||||
This does require allowing a health monitoring link to the Artemis broker, but so far testing indicates that this
|
||||
operates without issue. Longer term we need to ensure that the network map and configuration support exists for the
|
||||
system to work with multiple TCP/IP endpoints advertised to external nodes. Ideally this should be rolled into the
|
||||
work for AMQP bridges and Floats.
|
||||
3. Implement a very simple mutual exclusion feature, so that an enterprise node cannot start if another is running onto
|
||||
the same database. This can be via a simple heartbeat update in the database, or possibly some other library. This
|
||||
feature should be enabled only when specified by configuration.
|
||||
4. The replication of the Artemis Message Queues will have to be via an external mechanism. On Azure we believe that the
|
||||
only practical solution is the 'Azure Files' approach which maps a virtual Samba drive. This we are testing in-case it
|
||||
is too slow to work. The mounting of separate Data Disks is possible, but they can only be mounted to one VM at a
|
||||
time, so they would not be compatible with the goal of no change requests for HA.
|
||||
5. Improve health monitoring to better indicate fault failure. Extending the existing JMX and logging support should
|
||||
achieve this, although we probably need to create watchdog CordApp that verifies that the State Machine and Artemis
|
||||
messaging are able to process new work and to monitor flow latency.
|
||||
6. Test the checkpointing mechanism and confirm that failures don't corrupt the data by deploying an HA setup on Azure
|
||||
and driving flows through the system as we stop the node randomly and switch to the other node. If this reveals any
|
||||
issues we will have to fix them.
|
||||
7. Confirm that the behaviour of the RPC Client API is stable through these restarts, from the perspective of a stateless
|
||||
REST server calling through to RPC. The RPC API should provide positive feedback to the application, so that it can
|
||||
respond in a controlled fashion when disconnected.
|
||||
8. Work on flow hospital tools where needed
|
||||
|
||||
## Moving Towards Automatic Failover HA
|
||||
### Moving Towards Automatic Failover HA
|
||||
|
||||
To move towards more automatic failover handling we need to ensure that the node can be partially active i.e. live monitoring the health status and perhaps keeping major data structures in sync for faster activation, but not actually processing flows. This needs to be reversible without leakage, or destabilising the node as it is common to use manually driven master changes to help with software upgrades and to carry out regular node shutdown and maintenance. Also, to reduce the risks associated with the uncoupled replication of the Artemis message data and the database I would recommend that we move the Artemis broker out of the node to allow us to create a failover cluster. This is also in line with the goal of creating a AMQP bridges and Floats.
|
||||
To move towards more automatic failover handling we need to ensure that the node can be partially active i.e. live
|
||||
monitoring the health status and perhaps keeping major data structures in sync for faster activation, but not actually
|
||||
processing flows. This needs to be reversible without leakage, or destabilising the node as it is common to use manually
|
||||
driven master changes to help with software upgrades and to carry out regular node shutdown and maintenance. Also, to
|
||||
reduce the risks associated with the uncoupled replication of the Artemis message data and the database I would
|
||||
recommend that we move the Artemis broker out of the node to allow us to create a failover cluster. This is also in line
|
||||
with the goal of creating a AMQP bridges and Floats.
|
||||
|
||||
To this end I would suggest packages of work that include:
|
||||
|
||||
1. Move the broker out of the node, which will require having a protocol that can be used to signal bridge creation and which decouples the network map. This is in line with the Flow work anyway.
|
||||
2. Create a mastering solution, probably using Atomix.IO although this might require a solution with a minimum of three nodes to avoid split brain issues. Ideally this service should be extensible in the future to lead towards an eventual state with Flow level sharding. Alternatively, we may be able to add a quick enterprise adaptor to ZooKeeper as master selector if time is tight. This will inevitably impact upon configuration and deployment support.
|
||||
1. Move the broker out of the node, which will require having a protocol that can be used to signal bridge creation and
|
||||
which decouples the network map. This is in line with the Flow work anyway.
|
||||
2. Create a mastering solution, probably using Atomix.IO although this might require a solution with a minimum of three
|
||||
nodes to avoid split brain issues. Ideally this service should be extensible in the future to lead towards an eventual
|
||||
state with Flow level sharding. Alternatively, we may be able to add a quick enterprise adaptor to ZooKeeper as
|
||||
master selector if time is tight. This will inevitably impact upon configuration and deployment support.
|
||||
3. Test the leakage when we repeated start-stop the Node class and fix any resource leaks, or deadlocks that occur at shutdown.
|
||||
4. Switch the Artemis client code to be able to use the HA mode connection type and thus take advantage of the rapid failover code. Also, ensure that we can support multiple public IP addresses reported in the network map.
|
||||
5. Implement proper detection and handling of disconnect from the external database and/or Artemis broker, which should immediately drop the master status of the node and flush any incomplete flows.
|
||||
6. We should start looking at how to make RPC proxies recover from disconnect/failover, although this is probably not a top priority. However, it would be good to capture the missed results of completed flows and ensure the API allows clients to unregister/re-register Observables.
|
||||
4. Switch the Artemis client code to be able to use the HA mode connection type and thus take advantage of the rapid
|
||||
failover code. Also, ensure that we can support multiple public IP addresses reported in the network map.
|
||||
5. Implement proper detection and handling of disconnect from the external database and/or Artemis broker, which should
|
||||
immediately drop the master status of the node and flush any incomplete flows.
|
||||
6. We should start looking at how to make RPC proxies recover from disconnect/failover, although this is probably not a
|
||||
top priority. However, it would be good to capture the missed results of completed flows and ensure the API allows
|
||||
clients to unregister/re-register Observables.
|
||||
|
||||
## The Future
|
||||
|
||||
Hopefully, most of the work from the automatic failover mode can be modified when we move to a full hot-hot sharding of flows across nodes. The mastering solution will need to be modified to negotiate finer grained claim on individual flows, rather than stopping the whole of Node. Also, the routing of messages will have to be thought about so that they go to the correct node for processing, but failover if the node dies. However, most of the other health monitoring and operational aspects should be reusable.
|
||||
Hopefully, most of the work from the automatic failover mode can be modified when we move to a full hot-hot sharding of
|
||||
flows across nodes. The mastering solution will need to be modified to negotiate finer grained claim on individual
|
||||
flows, rather than stopping the whole of Node. Also, the routing of messages will have to be thought about so that they
|
||||
go to the correct node for processing, but failover if the node dies. However, most of the other health monitoring and
|
||||
operational aspects should be reusable.
|
||||
|
||||
We also need to look at DR issues and in particular how we might handle asynchronous replication and possibly alternative recovery/reconciliation mechanisms.
|
||||
We also need to look at DR issues and in particular how we might handle asynchronous replication and possibly
|
||||
alternative recovery/reconciliation mechanisms.
|
||||
|
Before Width: | Height: | Size: 376 KiB After Width: | Height: | Size: 376 KiB |
Before Width: | Height: | Size: 423 KiB After Width: | Height: | Size: 423 KiB |
Before Width: | Height: | Size: 247 KiB After Width: | Height: | Size: 247 KiB |
Before Width: | Height: | Size: 280 KiB After Width: | Height: | Size: 280 KiB |
50
docs/source/design/kafka-notary/decisions/index-storage.md
Normal file
@ -0,0 +1,50 @@
|
||||
# Design Decision: Storage engine for committed state index
|
||||
|
||||
## Background / Context
|
||||
|
||||
The storage engine for the committed state index needs to support a single operation: "insert all values with unique
|
||||
keys, or abort if any key conflict found". A wide range of solutions could be used for that, from embedded key-value
|
||||
stores to full-fledged relational databases. However, since we don't need any extra features a RDBMS provides over a
|
||||
simple key-value store, we'll only consider lightweight embedded solutions to avoid extra operational costs.
|
||||
|
||||
Most RDBMSs are also generally optimised for read performance (use B-tree based storage engines like InnoDB, MyISAM).
|
||||
Our workload is write-heavy and uses "random" primary keys (state references), which leads to particularly poor write
|
||||
performance for those types of engines – as we have seen with our Galera-based notary service. One exception is the
|
||||
MyRocks storage engine, which is based on RocksDB and can handle write workloads well, and is supported by Percona
|
||||
Server, and MariaDB. It is easier, however, to just use RocksDB directly.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
### A. RocksDB
|
||||
|
||||
An embedded key-value store based on log-structured merge-trees (LSM). It's highly configurable, provides lots of
|
||||
configuration options for performance tuning. E.g. can be tuned to run on different hardware – flash, hard disks or
|
||||
entirely in-memory.
|
||||
|
||||
### B. LMDB
|
||||
|
||||
An embedded key-value store using B+ trees, has ACID semantics and support for transactions.
|
||||
|
||||
### C. MapDB
|
||||
|
||||
An embedded Java database engine, providing persistent collection implementations. Uses memory mapped files. Simple to
|
||||
use, implements Java collection interfaces. Provides a HashMap implementation that we can use for storing committed
|
||||
states.
|
||||
|
||||
### D. MVStore
|
||||
|
||||
An embedded log structured key-value store. Provides a simple persistent map abstraction. Supports multiple map
|
||||
implementations (B-tree, R-tree, concurrent B-tree).
|
||||
|
||||
## Recommendation and justification
|
||||
|
||||
Performance test results when running on a Macbook Pro with Intel Core i7-4980HQ CPU @ 2.80GHz, 16 GB RAM, SSD:
|
||||
|
||||

|
||||
|
||||
Multiple tests were run with varying number of transactions and input states per transaction: "1m x 1" denotes a million
|
||||
transactions with one input state.
|
||||
|
||||
Proceed with Option A, as RocksDB provides most tuning options and achieves by far the best write performance.
|
||||
|
||||
Note that the index storage engine can be replaced in the future with minimal changes required on the notary service.
|
@ -1,12 +1,10 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Replication framework
|
||||
================================
|
||||
# Design Decision: Replication framework
|
||||
|
||||
## Background / Context
|
||||
|
||||
Multiple libraries/platforms exist for implementing fault-tolerant systems. In existing CFT notary implementations we experimented with using a traditional relational database with active replication, as well as a pure state machine replication approach based on CFT consensus algorithms.
|
||||
Multiple libraries/platforms exist for implementing fault-tolerant systems. In existing CFT notary implementations we
|
||||
experimented with using a traditional relational database with active replication, as well as a pure state machine
|
||||
replication approach based on CFT consensus algorithms.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
@ -14,7 +12,12 @@ Multiple libraries/platforms exist for implementing fault-tolerant systems. In e
|
||||
|
||||
*Raft-based fault-tolerant distributed coordination framework.*
|
||||
|
||||
Our first CFT notary notary implementation was based on Atomix. Atomix can be easily embedded into a Corda node and provides abstractions for implementing custom replicated state machines. In our case the state machine manages committed Corda contract states. When notarisation requests are sent to Atomix, they get forwarded to the leader node. The leader persists the request to a log, and replicates it to all followers. Once the majority of followers acknowledge receipt, it applies the request to the user-defined state machine. In our case we commit all input states in the request to a JDBC-backed map, or return an error if conflicts occur.
|
||||
Our first CFT notary notary implementation was based on Atomix. Atomix can be easily embedded into a Corda node and
|
||||
provides abstractions for implementing custom replicated state machines. In our case the state machine manages committed
|
||||
Corda contract states. When notarisation requests are sent to Atomix, they get forwarded to the leader node. The leader
|
||||
persists the request to a log, and replicates it to all followers. Once the majority of followers acknowledge receipt,
|
||||
it applies the request to the user-defined state machine. In our case we commit all input states in the request to a
|
||||
JDBC-backed map, or return an error if conflicts occur.
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -32,7 +35,8 @@ Our first CFT notary notary implementation was based on Atomix. Atomix can be ea
|
||||
|
||||
*Java persistence layer with a built-in Raft-based replicated key-value store.*
|
||||
|
||||
Conceptually similar to Atomix, but persists the state machine instead of the request log. Built around an abstract persistent key-value store: requests get cleaned up after replication and processing.
|
||||
Conceptually similar to Atomix, but persists the state machine instead of the request log. Built around an abstract
|
||||
persistent key-value store: requests get cleaned up after replication and processing.
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -52,7 +56,12 @@ Conceptually similar to Atomix, but persists the state machine instead of the re
|
||||
|
||||
*Paxos-based distributed streaming platform.*
|
||||
|
||||
Atomix and Permazen implement both the replicated request log and the state machine, but Kafka only provides the log component. In theory that means more complexity having to implement request log processing and state machine management, but for our use case it's fairly straightforward: consume requests and insert input states into a database, marking the position of the last processed request. If the database is lost, we can just replay the log from the beginning. The main benefit of this approach is that it gives a more granular control and performance tuning opportunities in different parts of the system.
|
||||
Atomix and Permazen implement both the replicated request log and the state machine, but Kafka only provides the log
|
||||
component. In theory that means more complexity having to implement request log processing and state machine management,
|
||||
but for our use case it's fairly straightforward: consume requests and insert input states into a database, marking the
|
||||
position of the last processed request. If the database is lost, we can just replay the log from the beginning. The main
|
||||
benefit of this approach is that it gives a more granular control and performance tuning opportunities in different
|
||||
parts of the system.
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -67,11 +76,16 @@ Atomix and Permazen implement both the replicated request log and the state mach
|
||||
|
||||
### D. Custom Raft-based implementation
|
||||
|
||||
For even more granular control, we could replace Kafka with our own replicated log implementation. Kafka was started before the Raft consensus algorithm was introduced, and is using Zookeeper for coordination, which is based on Paxos for consensus. Paxos is known to be complex to understand and implement, and the main driver behind Raft was to create a much simpler algorithm with equivalent functionality. Hence, while reimplementing Zookeeper would be an onerous task, building a Raft-based alternative from scratch is somewhat feasible.
|
||||
For even more granular control, we could replace Kafka with our own replicated log implementation. Kafka was started
|
||||
before the Raft consensus algorithm was introduced, and is using Zookeeper for coordination, which is based on Paxos for
|
||||
consensus. Paxos is known to be complex to understand and implement, and the main driver behind Raft was to create a
|
||||
much simpler algorithm with equivalent functionality. Hence, while reimplementing Zookeeper would be an onerous task,
|
||||
building a Raft-based alternative from scratch is somewhat feasible.
|
||||
|
||||
#### Advantages
|
||||
|
||||
Most of the implementations above have many extra features our use-case does not require. We can implement a relatively simple clean optimised solution that will most likely outperform others (Thomas Schroeter already built a prototype).
|
||||
Most of the implementations above have many extra features our use-case does not require. We can implement a relatively
|
||||
simple clean optimised solution that will most likely outperform others (Thomas Schroeter already built a prototype).
|
||||
|
||||
#### Disadvantages
|
||||
|
||||
@ -81,9 +95,17 @@ Large effort required to make it highly performant and reliable.
|
||||
|
||||
*Synchronous replication plugin for MySQL, uses certification-based replication.*
|
||||
|
||||
All of the options discussed so far were based on abstract state machine replication. Another approach is simply using a more traditional RDBMS with active replication support. Note that most relational databases support some form replication in general, however, very few provide strong consistency guarantees and ensure no data loss. Galera is a plugin for MySQL enabling synchronous multi-master replication.
|
||||
All of the options discussed so far were based on abstract state machine replication. Another approach is simply using a
|
||||
more traditional RDBMS with active replication support. Note that most relational databases support some form
|
||||
replication in general, however, very few provide strong consistency guarantees and ensure no data loss. Galera is a
|
||||
plugin for MySQL enabling synchronous multi-master replication.
|
||||
|
||||
Galera uses certification-based replication, which operates on write-sets: a database server executes the (database) transaction, and only performs replication if the transaction requires write operations. If it does, the transaction is broadcasted to all other servers (using atomic broadcast). On delivery, each server executes a deterministic certification phase, which decides if the transaction can commit or must abort. If a conflict occurs, the entire cluster rolls back the transaction. This type of technique is quite efficient in low-conflict situations and allows read scaling (the latter is mostly irrelevant for our use case).
|
||||
Galera uses certification-based replication, which operates on write-sets: a database server executes the (database)
|
||||
transaction, and only performs replication if the transaction requires write operations. If it does, the transaction is
|
||||
broadcasted to all other servers (using atomic broadcast). On delivery, each server executes a deterministic
|
||||
certification phase, which decides if the transaction can commit or must abort. If a conflict occurs, the entire cluster
|
||||
rolls back the transaction. This type of technique is quite efficient in low-conflict situations and allows read scaling
|
||||
(the latter is mostly irrelevant for our use case).
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -100,7 +122,11 @@ Galera uses certification-based replication, which operates on write-sets: a dat
|
||||
|
||||
*Distributed SQL database built on a transactional and strongly-consistent key-value store. Uses Raft-based replication.*
|
||||
|
||||
On paper, CockroachDB looks like a great candidate, but it relies on sharding: data is automatically split into partitions, and each partition is replicated using Raft. It performs great for single-shard database transactions, and also natively supports cross-shard atomic commits. However, the majority of Corda transactions are likely to have more than one input state, which means that most transaction commits will require cross-shard database transactions. In our tests we were only able to achieve up to 30 TPS in a 3 DC deployment.
|
||||
On paper, CockroachDB looks like a great candidate, but it relies on sharding: data is automatically split into
|
||||
partitions, and each partition is replicated using Raft. It performs great for single-shard database transactions, and
|
||||
also natively supports cross-shard atomic commits. However, the majority of Corda transactions are likely to have more
|
||||
than one input state, which means that most transaction commits will require cross-shard database transactions. In our
|
||||
tests we were only able to achieve up to 30 TPS in a 3 DC deployment.
|
||||
|
||||
#### Advantages
|
||||
|
||||
@ -114,4 +140,5 @@ On paper, CockroachDB looks like a great candidate, but it relies on sharding: d
|
||||
|
||||
## Recommendation and justification
|
||||
|
||||
Proceed with Option C. A Kafka-based solution strikes the best balance between performance and the required effort to build a production-ready solution.
|
||||
Proceed with Option C. A Kafka-based solution strikes the best balance between performance and the required effort to
|
||||
build a production-ready solution.
|
@ -1,46 +1,18 @@
|
||||

|
||||
|
||||
# High Performance CFT Notary Service
|
||||
|
||||
DOCUMENT MANAGEMENT
|
||||
---
|
||||
|
||||
## Document Control
|
||||
|
||||
| Title | High Performance CFT Notary Service |
|
||||
| -------------------- | ------------------------------------------------------------ |
|
||||
| Date | 27 March 2018 |
|
||||
| Author | Andrius Dagys, Thomas Schroeter |
|
||||
| Distribution | Design Review Board, Product Management, Services - Technical (Consulting), Platform Delivery |
|
||||
| Corda target version | Enterprise |
|
||||
| JIRA reference | https://r3-cev.atlassian.net/browse/CID-294 |
|
||||
|
||||
## Approvals
|
||||
|
||||
#### Document Sign-off
|
||||
|
||||
| Author | Andrius Dagys |
|
||||
| ----------------- | -------------------------------------------------- |
|
||||
| Reviewer(s) | (GitHub PR reviewers) |
|
||||
| Final approver(s) | (GitHub PR approver(s) from Design Approval Board) |
|
||||
|
||||
#### Design Decisions
|
||||
|
||||
| Description | Recommendation | Approval |
|
||||
| ---------------------------------------- | --------------- | ----------------------- |
|
||||
| [Replication framework](decisions/replicated-storage.md) | Option C | (Design Approval Board) |
|
||||
| [Index storage engine](decisions/index-storage.md) | Option A |(Design Approval Board) |
|
||||
|
||||
HIGH LEVEL DESIGN
|
||||
---
|
||||
.. important:: This design document describes a feature of Corda Enterprise.
|
||||
|
||||
## Overview
|
||||
|
||||
This proposal describes the architecture and an implementation for a high performance crash fault-tolerant notary service, operated by a single party.
|
||||
This proposal describes the architecture and an implementation for a high performance crash fault-tolerant notary
|
||||
service, operated by a single party.
|
||||
|
||||
## Background
|
||||
|
||||
For initial deployments, we expect to operate a single non-validating CFT notary service. The current Raft and Galera implementations cannot handle more than 100-200 TPS, which is likely to be a serious bottleneck in the near future. To support our clients and compete with other platforms we need a notary service that can handle TPS in the order of 1,000s.
|
||||
For initial deployments, we expect to operate a single non-validating CFT notary service. The current Raft and Galera
|
||||
implementations cannot handle more than 100-200 TPS, which is likely to be a serious bottleneck in the near future. To
|
||||
support our clients and compete with other platforms we need a notary service that can handle TPS in the order of
|
||||
1,000s.
|
||||
|
||||
## Scope
|
||||
|
||||
@ -69,28 +41,59 @@ The notary service should be able to:
|
||||
- Tolerate single datacenter failure.
|
||||
- Tolerate single disk failure/corruption.
|
||||
|
||||
## Target Solution
|
||||
|
||||
Having explored different solutions for implementing notaries we propose the following architecture for a CFT notary, consisting of two components:
|
||||
|
||||
1. A central replicated request log, which orders and stores all notarisation requests. Efficient append-only log storage can be used along with batched replication, making performance mainly dependent on network throughput.
|
||||
2. Worker nodes that service clients and maintain a consumed state index. The state index is a simple key-value store containing committed state references and pointers to the corresponding request positions in the log. If lost, it can be reconstructed by replaying and applying request log entries. There is a range of fast key-value stores that can be used for implementation.
|
||||
|
||||

|
||||
|
||||
At high level, client notarisation requests first get forwarded to a central replicated request log. The requests are then applied in order to the consumed state index in each worker to verify input state uniqueness. Each individual request outcome (success/conflict) is then sent back to the initiating client by the worker responsible for it. To emphasise, each worker will process _all_ notarisation requests, but only respond to the ones it received directly.
|
||||
|
||||
Messages (requests) in the request log are persisted and retained forever. The state index has a relatively low footprint and can in theory be kept entirely in memory. However, when a worker crashes, replaying the log to recover the index may take too long depending on the SLAs. Additionally, we expect applying the requests to the index to be much faster than consuming request batches even with persistence enabled.
|
||||
|
||||
_Technically_, the request log can also be kept entirely in memory, and the cluster will still be able to tolerate up to $f < n/2$ node failures. However, if for some reason the entire cluster is shut down (e.g. administrator error), all requests will be forever lost! Therefore, we should avoid it.
|
||||
|
||||
The request log does not need to be a separate cluster, and the worker nodes _could_ maintain the request log replicas locally. This would allow workers to consume ordered requests from the local copy rather than from a leader node across the network. It is hard to say, however, if this would have a significant performance impact without performing tests in the specific network environment (e.g. the bottleneck could be the replication step).
|
||||
|
||||
One advantage of hosting the request log in a separate cluster is that it makes it easier to independently scale the number of worker nodes. If, for example, if transaction validation and resolution is required when receiving a notarisation request, we might find that a significant number of receivers is required to generate enough incoming traffic to the request log. On the flipside, increasing the number of workers adds additional consumers and load on the request log, so a balance needs to be found.
|
||||
|
||||
## Design Decisions
|
||||
|
||||
As the design decision documents below discuss, the most suitable platform for managing the request log was chosen to be [Apache Kafka](https://kafka.apache.org/), and [RocksDB](http://rocksdb.org/) as the storage engine for the committed state index.
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
decisions/replicated-storage.md
|
||||
decisions/index-storage.md
|
||||
|
||||
## Target Solution
|
||||
|
||||
Having explored different solutions for implementing notaries we propose the following architecture for a CFT notary,
|
||||
consisting of two components:
|
||||
|
||||
1. A central replicated request log, which orders and stores all notarisation requests. Efficient append-only log
|
||||
storage can be used along with batched replication, making performance mainly dependent on network throughput.
|
||||
2. Worker nodes that service clients and maintain a consumed state index. The state index is a simple key-value store
|
||||
containing committed state references and pointers to the corresponding request positions in the log. If lost, it can be
|
||||
reconstructed by replaying and applying request log entries. There is a range of fast key-value stores that can be used
|
||||
for implementation.
|
||||
|
||||

|
||||
|
||||
At high level, client notarisation requests first get forwarded to a central replicated request log. The requests are
|
||||
then applied in order to the consumed state index in each worker to verify input state uniqueness. Each individual
|
||||
request outcome (success/conflict) is then sent back to the initiating client by the worker responsible for it. To
|
||||
emphasise, each worker will process _all_ notarisation requests, but only respond to the ones it received directly.
|
||||
|
||||
Messages (requests) in the request log are persisted and retained forever. The state index has a relatively low
|
||||
footprint and can in theory be kept entirely in memory. However, when a worker crashes, replaying the log to recover the
|
||||
index may take too long depending on the SLAs. Additionally, we expect applying the requests to the index to be much
|
||||
faster than consuming request batches even with persistence enabled.
|
||||
|
||||
_Technically_, the request log can also be kept entirely in memory, and the cluster will still be able to tolerate up to
|
||||
$f < n/2$ node failures. However, if for some reason the entire cluster is shut down (e.g. administrator error), all
|
||||
requests will be forever lost! Therefore, we should avoid it.
|
||||
|
||||
The request log does not need to be a separate cluster, and the worker nodes _could_ maintain the request log replicas
|
||||
locally. This would allow workers to consume ordered requests from the local copy rather than from a leader node across
|
||||
the network. It is hard to say, however, if this would have a significant performance impact without performing tests in
|
||||
the specific network environment (e.g. the bottleneck could be the replication step).
|
||||
|
||||
One advantage of hosting the request log in a separate cluster is that it makes it easier to independently scale the
|
||||
number of worker nodes. If, for example, if transaction validation and resolution is required when receiving a
|
||||
notarisation request, we might find that a significant number of receivers is required to generate enough incoming
|
||||
traffic to the request log. On the flipside, increasing the number of workers adds additional consumers and load on the
|
||||
request log, so a balance needs to be found.
|
||||
|
||||
## Design Decisions
|
||||
|
||||
As the design decision documents below discuss, the most suitable platform for managing the request log was chosen to be
|
||||
[Apache Kafka](https://kafka.apache.org/), and [RocksDB](http://rocksdb.org/) as the storage engine for the committed
|
||||
state index.
|
||||
|
||||
| Heading | Recommendation |
|
||||
| ---------------------------------------- | -------------- |
|
||||
@ -106,13 +109,23 @@ A Kafka-based notary service does not deviate much from the high-level target so
|
||||
|
||||

|
||||
|
||||
For our purposes we can view Kafka as a replicated durable queue we can push messages (_records_) to and consume from. Consuming a record just increments the consumer's position pointer, and does not delete it. Old records eventually expire and get cleaned up, but the expiry time can be set to "indefinite" so all data is retained (it's a supported use-case).
|
||||
For our purposes we can view Kafka as a replicated durable queue we can push messages (_records_) to and consume from.
|
||||
Consuming a record just increments the consumer's position pointer, and does not delete it. Old records eventually
|
||||
expire and get cleaned up, but the expiry time can be set to "indefinite" so all data is retained (it's a supported
|
||||
use-case).
|
||||
|
||||
The main caveat is that Kafka does not allow consuming records from replicas directly – all communication has to be routed via a single leader node.
|
||||
The main caveat is that Kafka does not allow consuming records from replicas directly – all communication has to be
|
||||
routed via a single leader node.
|
||||
|
||||
In Kafka, logical queues are called _topics_. Each topic can be split into multiple partitions. Topics are assigned a _replication factor_, which specifies how many replicas Kafka should create for each partition. Each replicated partition has an assigned leader node which producers and consumers can connect to. Partitioning topics and evenly distributing partition leadership allows Kafka to scale well horizontally.
|
||||
In Kafka, logical queues are called _topics_. Each topic can be split into multiple partitions. Topics are assigned a
|
||||
_replication factor_, which specifies how many replicas Kafka should create for each partition. Each replicated
|
||||
partition has an assigned leader node which producers and consumers can connect to. Partitioning topics and evenly
|
||||
distributing partition leadership allows Kafka to scale well horizontally.
|
||||
|
||||
In our use-case, however, we can only use a single-partition topic for notarisation requests, which limits the total capacity and throughput to a single machine. Partitioning requests would break global transaction ordering guarantees for consumers. There is a [proposal](#kafka-throughput-scaling-via-partitioning) from Rick Parker on how we _could_ use partitioning to potentially avoid traffic contention on the single leader node.
|
||||
In our use-case, however, we can only use a single-partition topic for notarisation requests, which limits the total
|
||||
capacity and throughput to a single machine. Partitioning requests would break global transaction ordering guarantees
|
||||
for consumers. There is a [proposal](#kafka-throughput-scaling-via-partitioning) from Rick Parker on how we _could_ use
|
||||
partitioning to potentially avoid traffic contention on the single leader node.
|
||||
|
||||
### Data model
|
||||
|
Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 19 KiB |
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB |
Before Width: | Height: | Size: 40 KiB After Width: | Height: | Size: 40 KiB |
@ -1,41 +0,0 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Storage engine for committed state index
|
||||
============================================
|
||||
|
||||
## Background / Context
|
||||
|
||||
The storage engine for the committed state index needs to support a single operation: "insert all values with unique keys, or abort if any key conflict found". A wide range of solutions could be used for that, from embedded key-value stores to full-fledged relational databases. However, since we don't need any extra features a RDBMS provides over a simple key-value store, we'll only consider lightweight embedded solutions to avoid extra operational costs.
|
||||
|
||||
Most RDBMSs are also generally optimised for read performance (use B-tree based storage engines like InnoDB, MyISAM). Our workload is write-heavy and uses "random" primary keys (state references), which leads to particularly poor write performance for those types of engines – as we have seen with our Galera-based notary service. One exception is the MyRocks storage engine, which is based on RocksDB and can handle write workloads well, and is supported by Percona Server, and MariaDB. It is easier, however, to just use RocksDB directly.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
### A. RocksDB
|
||||
|
||||
An embedded key-value store based on log-structured merge-trees (LSM). It's highly configurable, provides lots of configuration options for performance tuning. E.g. can be tuned to run on different hardware – flash, hard disks or entirely in-memory.
|
||||
|
||||
### B. LMDB
|
||||
|
||||
An embedded key-value store using B+ trees, has ACID semantics and support for transactions.
|
||||
|
||||
### C. MapDB
|
||||
|
||||
An embedded Java database engine, providing persistent collection implementations. Uses memory mapped files. Simple to use, implements Java collection interfaces. Provides a HashMap implementation that we can use for storing committed states.
|
||||
|
||||
### D. MVStore
|
||||
|
||||
An embedded log structured key-value store. Provides a simple persistent map abstraction. Supports multiple map implementations (B-tree, R-tree, concurrent B-tree).
|
||||
|
||||
## Recommendation and justification
|
||||
|
||||
Performance test results when running on a Macbook Pro with Intel Core i7-4980HQ CPU @ 2.80GHz, 16 GB RAM, SSD:
|
||||
|
||||

|
||||
|
||||
Multiple tests were run with varying number of transactions and input states per transaction: "1m x 1" denotes a million transactions with one input state.
|
||||
|
||||
Proceed with Option A, as RocksDB provides most tuning options and achieves by far the best write performance.
|
||||
|
||||
Note that the index storage engine can be replaced in the future with minimal changes required on the notary service.
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 63 KiB |
@ -1,76 +1,67 @@
|
||||

|
||||
|
||||
# Monitoring and Logging Design
|
||||
|
||||
DOCUMENT MANAGEMENT
|
||||
---
|
||||
|
||||
## Document Control
|
||||
|
||||
| Title | Monitoring and Logging |
|
||||
| -------------------- | ---------------------------------------- |
|
||||
| Date | 20th November 2017 |
|
||||
| Author | Jose Coll |
|
||||
| Distribution | Design Approval Board, DevOps, Platform Development (Data Deployment) |
|
||||
| Corda target version | Enterprise (primarily) |
|
||||
| JIRA reference | https://r3-cev.atlassian.net/browse/ENT-1109 |
|
||||
|
||||
## Approvals
|
||||
|
||||
#### Document Sign-off
|
||||
|
||||
| Author | Jose Coll |
|
||||
| ----------------- | ---------------------------------------- |
|
||||
| Reviewer(s) | DevOps, Product Management, Platform Development (Data Deployment) |
|
||||
| Final approver(s) | Design Approval Board (DAB) |
|
||||
|
||||
#### Design Decisions
|
||||
|
||||
| Description | Recommendation | Approval |
|
||||
| ---------------------------------------- | -------------- | -------- |
|
||||
| JMX for Eventing, SLF4J for Logging | JMX, SLF4J | |
|
||||
| Continue or discontinue usage of Jolokia? | TBC | |
|
||||
| Separation of Corda Node and CorDapp log outputs | TBC | |
|
||||
|
||||
## Document History
|
||||
|
||||
To be managed by GitHub revision control.
|
||||
|
||||
HIGH LEVEL DESIGN
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
The successful deployment and operation of Corda (and associated CorDapps) in a Production environment requires a supporting monitoring and management capability to ensure that both a Corda node (and its supporting middleware infrastructure) and deployed CorDapps execute in a functionally correct and consistent manner. A pro-active monitoring solution will enable the immediate alerting of unexpected behaviours and associated management tooling should enable swift corrective action.
|
||||
The successful deployment and operation of Corda (and associated CorDapps) in a production environment requires a
|
||||
supporting monitoring and management capability to ensure that both a Corda node (and its supporting middleware
|
||||
infrastructure) and deployed CorDapps execute in a functionally correct and consistent manner. A pro-active monitoring
|
||||
solution will enable the immediate alerting of unexpected behaviours and associated management tooling should enable
|
||||
swift corrective action.
|
||||
|
||||
This design defines the monitoring metrics and logging outputs, and associated implementation approach, required to enable a proactive enterprise management and monitoring solution of Corda nodes and their associated CorDapps. This also includes a set of "liveliness" checks to verify and validate correct functioning of a Corda node (and associated CorDapp).
|
||||
This design defines the monitoring metrics and logging outputs, and associated implementation approach, required to
|
||||
enable a proactive enterprise management and monitoring solution of Corda nodes and their associated CorDapps. This also
|
||||
includes a set of "liveliness" checks to verify and validate correct functioning of a Corda node (and associated
|
||||
CorDapp).
|
||||
|
||||

|
||||
|
||||
In the above diagram, the left handside dotted box represents the components within scope for this design. It is anticipated that 3rd party enterprise-wide system management solutions will closely follow the architectural component breakdown in the right handside box, and thus seamlessly integrate with the proposed Corda event generation and logging design. The interface between the two is de-coupled and based on textual log file parsing and adoption of industry standard JMX MBean events.
|
||||
In the above diagram, the left handside dotted box represents the components within scope for this design. It is
|
||||
anticipated that 3rd party enterprise-wide system management solutions will closely follow the architectural component
|
||||
breakdown in the right handside box, and thus seamlessly integrate with the proposed Corda event generation and logging
|
||||
design. The interface between the two is de-coupled and based on textual log file parsing and adoption of industry
|
||||
standard JMX MBean events.
|
||||
|
||||
## Background
|
||||
|
||||
Corda currently exposes several forms of monitorable content:
|
||||
|
||||
* Application log files using the [SLF4J](https://www.slf4j.org/) (Simple Logging Facade for Java) which provides an abstraction over various concrete logging frameworks (several of which are used within other Corda dependent 3rd party libraries). Corda itself uses the [Apache Log4j 2](https://logging.apache.org/log4j/2.x/) framework for logging output to a set of configured loggers (to include a rolling file appender and the console). Currently the same set of rolling log files are used by both the node and CorDapp(s) deployed to the node. The log file policy specifies a 60 day rolling period (but preserving the most recent 10Gb) with a maximum of 10 log files per day.
|
||||
* Application log files using the [SLF4J](https://www.slf4j.org/) (Simple Logging Facade for Java) which provides an
|
||||
abstraction over various concrete logging frameworks (several of which are used within other Corda dependent 3rd party
|
||||
libraries). Corda itself uses the [Apache Log4j 2](https://logging.apache.org/log4j/2.x/) framework for logging output
|
||||
to a set of configured loggers (to include a rolling file appender and the console). Currently the same set of rolling
|
||||
log files are used by both the node and CorDapp(s) deployed to the node. The log file policy specifies a 60 day
|
||||
rolling period (but preserving the most recent 10Gb) with a maximum of 10 log files per day.
|
||||
|
||||
* Industry standard exposed JMX-based metrics, both standard JVM and custom application metrics are exposed directly using the [Dropwizard.io](http://metrics.dropwizard.io/3.2.3/) *JmxReporter* facility. In addition Corda also uses the [Jolokia](https://jolokia.org/) framework to make these accesible over an HTTP endpoint. Typically, these metrics are also collated by 3rd party tools to provide pro-active monitoring, visualisation and re-active management.
|
||||
* Industry standard exposed JMX-based metrics, both standard JVM and custom application metrics are exposed directly
|
||||
using the [Dropwizard.io](http://metrics.dropwizard.io/3.2.3/) *JmxReporter* facility. In addition Corda also uses the
|
||||
[Jolokia](https://jolokia.org/) framework to make these accesible over an HTTP endpoint. Typically, these metrics are
|
||||
also collated by 3rd party tools to provide pro-active monitoring, visualisation and re-active management.
|
||||
|
||||
A full list of currently exposed metrics can be found in the appendix A.
|
||||
|
||||
The Corda flow framework also has *placeholder* support for recording additional Audit data in application flows using a simple *AuditService*. Audit event types are currently loosely defined and data is stored in string form (as a description and contextual map of name-value pairs) together with a timestamp and principal name. This service does not currently have an implementation of the audit event data to a persistent store.
|
||||
The Corda flow framework also has *placeholder* support for recording additional Audit data in application flows using a
|
||||
simple *AuditService*. Audit event types are currently loosely defined and data is stored in string form (as a
|
||||
description and contextual map of name-value pairs) together with a timestamp and principal name. This service does not
|
||||
currently have an implementation of the audit event data to a persistent store.
|
||||
|
||||
The `ProgressTracker` component is used to report the progress of a flow throughout its business lifecycle, and is typically configured to report the start of a specific business workflow step (often before and after message send and receipt where other participants form part of a multi-staged business workflow). The progress tracking framework was designed to become a vital part of how exceptions, errors, and other faults are surfaced to human operators for investigation and resolution. It provides a means of exporting progress as a hierachy of steps in a way that’s both human readable and machine readable.
|
||||
The `ProgressTracker` component is used to report the progress of a flow throughout its business lifecycle, and is
|
||||
typically configured to report the start of a specific business workflow step (often before and after message send and
|
||||
receipt where other participants form part of a multi-staged business workflow). The progress tracking framework was
|
||||
designed to become a vital part of how exceptions, errors, and other faults are surfaced to human operators for
|
||||
investigation and resolution. It provides a means of exporting progress as a hierachy of steps in a way that’s both
|
||||
human readable and machine readable.
|
||||
|
||||
In addition, in-house Corda networks at R3 use the following tools:
|
||||
|
||||
* Standard [DataDog](https://docs.datadoghq.com/guides/overview/) probes are currently used to provide e-mail based alerting for running Corda nodes. [Telegraf](https://github.com/influxdata/telegraf) is used in conjunction with a [Jolokia agent](https://jolokia.org/agent.html) as a collector to parse emitted metric data and push these to DataDog.
|
||||
* Investigation is underway to evaluate [ELK](https://logz.io/learn/complete-guide-elk-stack/) as a mechanism for parsing, indexing, storing, searching, and visualising log file data.
|
||||
* Standard [DataDog](https://docs.datadoghq.com/guides/overview/) probes are currently used to provide e-mail based
|
||||
alerting for running Corda nodes. [Telegraf](https://github.com/influxdata/telegraf) is used in conjunction with a
|
||||
[Jolokia agent](https://jolokia.org/agent.html) as a collector to parse emitted metric data and push these to DataDog.
|
||||
* Investigation is underway to evaluate [ELK](https://logz.io/learn/complete-guide-elk-stack/) as a mechanism for parsing,
|
||||
indexing, storing, searching, and visualising log file data.
|
||||
|
||||
## Scope
|
||||
|
||||
#### Goals
|
||||
### Goals
|
||||
|
||||
- Add new metrics at the level of a Corda node, individual CorDapps, and other supporting Corda components (float, bridge manager, doorman)
|
||||
- Support liveness checking of the node, deployed flows and services
|
||||
@ -79,17 +70,12 @@ In addition, in-house Corda networks at R3 use the following tools:
|
||||
- Implement the audit framework that is currently only a stubbed out API
|
||||
- Ensure that Corda can be used with third party systems for monitoring, log collection and audit
|
||||
|
||||
#### Out of scope
|
||||
### Out of scope
|
||||
|
||||
- Recommendation of a specific set of monitoring tools.
|
||||
- Monitoring of network infrastructure like the network map service.
|
||||
- Monitoring of liveness of peers.
|
||||
|
||||
#### Reference(s) to similar work
|
||||
|
||||
* [Flow Audit Logging and Management Design](https://r3-cev.atlassian.net/wiki/spaces/AR/pages/127180188/Flow+Audit+Logging+and+Management+Design) - this proposal from April 17 also includes a prototype specification of an [Audit API](https://github.com/corda/corda/pull/620).
|
||||
* [Corda Support - Monitoring Requirements Guide](https://r3-cev.atlassian.net/wiki/spaces/CCD/pages/131398183/Support+Team+Monitoring+Requirements?preview=/131398183/131398332/monitoring_requirements.v1.0.docx)
|
||||
|
||||
## Requirements
|
||||
|
||||
Expanding on the first goal identified above, the following requirements have been identified:
|
||||
@ -129,19 +115,22 @@ Expanding on the first goal identified above, the following requirements have be
|
||||
|
||||
#### Use Cases
|
||||
|
||||
It is envisaged that operational management and support teams will use the metrics and information collated from this design, either directly or through an integrated enterprise-wide systems management platform, to perform the following:
|
||||
It is envisaged that operational management and support teams will use the metrics and information collated from this
|
||||
design, either directly or through an integrated enterprise-wide systems management platform, to perform the following:
|
||||
|
||||
- Validate liveness and correctness of Corda nodes and deployed CorDapps, and the physical machine or VM they are hosted on.
|
||||
|
||||
* Use logging to troubleshoot operational failures (in conjunction with other supporting failure information: eg. GC logs, stack traces)
|
||||
* Use reported metrics to fine-tune and tweak operational systems parameters (including dynamic setting of logging modules and severity levels to enable detailed logging).
|
||||
* Use reported metrics to fine-tune and tweak operational systems parameters (including dynamic setting of logging
|
||||
modules and severity levels to enable detailed logging).
|
||||
|
||||
## Design Decisions
|
||||
|
||||
The following design decisions are to be confirmed:
|
||||
|
||||
1. JMX for metric eventing and SLF4J for logging
|
||||
Both above are widely adopted mechanisms that enable pluggability and seamless inteoperability with other 3rd party enterprise-wide system management solutions.
|
||||
Both above are widely adopted mechanisms that enable pluggability and seamless inteoperability with other 3rd party
|
||||
enterprise-wide system management solutions.
|
||||
2. Continue or discontinue usage of Jolokia? (TBC - most likely yes, subject to read-only security lock-down)
|
||||
3. Separation of Corda Node and CorDapp log outputs (TBC)
|
||||
|
||||
@ -149,39 +138,59 @@ The following design decisions are to be confirmed:
|
||||
|
||||
There are a number of activities and parts to the solution proposal:
|
||||
|
||||
1. Extend JMX metric reporting through the Corda Monitoring Service and associated jolokia conversion to REST/JSON) coverage (see implementation details) to include all Corda services (vault, key management, transaction storage, network map, attachment storage, identity, cordapp provision) & subsytems components (state machine)
|
||||
1. Extend JMX metric reporting through the Corda Monitoring Service and associated jolokia conversion to REST/JSON)
|
||||
coverage (see implementation details) to include all Corda services (vault, key management, transaction storage,
|
||||
network map, attachment storage, identity, cordapp provision) & subsytems components (state machine)
|
||||
|
||||
2. Review and extend Corda log4j2 coverage (see implementation details) to ensure
|
||||
|
||||
- consistent use of severities according to situation
|
||||
- consistent coverage across all modules and libraries
|
||||
- consistent output format with all relevant contextual information (node identity, user/execution identity, flow session identity, version information)
|
||||
- consistent output format with all relevant contextual information (node identity, user/execution identity, flow
|
||||
session identity, version information)
|
||||
- separation of Corda Node and CorDapp log outputs (TBC)
|
||||
For consistent interleaving reasons, it may be desirable to continue using combined log output.
|
||||
|
||||
Publication of a *code style guide* to define when to use different severity levels.
|
||||
|
||||
3. Implement a CorDapp to perform sanity checking of flow framework, fundamental corda services (vault, identity), and dependent middleware infrastructure (message broker, database).
|
||||
[TBC]
|
||||
3. Implement a CorDapp to perform sanity checking of flow framework, fundamental corda services (vault, identity), and
|
||||
dependent middleware infrastructure (message broker, database).
|
||||
|
||||
4. Revisit and enhance as necessary the [Audit service API]( https://github.com/corda/corda/pull/620 ), and provide a persistent backed implementation, to include:
|
||||
4. Revisit and enhance as necessary the [Audit service API]( https://github.com/corda/corda/pull/620 ), and provide a
|
||||
persistent backed implementation, to include:
|
||||
|
||||
- specification of Business Event Categories (eg. User authentication and authorisation, Flow-based triggering, Corda Service invocations, Oracle invocations, Flow-based send/receive calls, RPC invocations)
|
||||
- specification of Business Event Categories (eg. User authentication and authorisation, Flow-based triggering, Corda
|
||||
Service invocations, Oracle invocations, Flow-based send/receive calls, RPC invocations)
|
||||
- auto-enabled with Progress Tracker as Business Event generator
|
||||
- RDBMS backed persistent store (independent of Corda database), with adequate security controls (authenticated access and read-only permissioning). Captured information should be consistent with standard logging, and it may be desirable to define auditable loggers within log4j2 to automatically redirect certain types of log events to the audit service.
|
||||
- RDBMS backed persistent store (independent of Corda database), with adequate security controls (authenticated access
|
||||
and read-only permissioning). Captured information should be consistent with standard logging, and it may be desirable
|
||||
to define auditable loggers within log4j2 to automatically redirect certain types of log events to the audit service.
|
||||
|
||||
5. Ensure 3rd party middleware drivers (JDBC for database, MQ for messaging) and the JVM are correctly configured to export JMX metrics. Ensure the [JVM Hotspot VM command-line parameters](https://docs.oracle.com/javase/8/docs/technotes/guides/troubleshoot/clopts001.html) are tuned correctly to enable detailed troubleshooting upon failure. Many of these metrics are already automatically exposed to 3rd party profiling tools such as Yourkit.
|
||||
5. Ensure 3rd party middleware drivers (JDBC for database, MQ for messaging) and the JVM are correctly configured to export
|
||||
JMX metrics. Ensure the [JVM Hotspot VM command-line parameters](https://docs.oracle.com/javase/8/docs/technotes/guides/troubleshoot/clopts001.html)
|
||||
are tuned correctly to enable detailed troubleshooting upon failure. Many of these metrics are already automatically
|
||||
exposed to 3rd party profiling tools such as Yourkit.
|
||||
|
||||
Apache Artemis has a comprehensive [management API](https://activemq.apache.org/artemis/docs/latest/management.html) that allows a user to modify a server configuration, create new resources (e.g. addresses and queues), inspect these resources (e.g. how many messages are currently held in a queue) and interact with it (e.g. to remove messages from a queue), and exposes key metrics using JMX (using role-based authentication using Artemis's JAAS plug-in support to ensure Artemis cannot be controlled via JMX)..
|
||||
Apache Artemis has a comprehensive [management API](https://activemq.apache.org/artemis/docs/latest/management.html)
|
||||
that allows a user to modify a server configuration, create new resources (e.g. addresses and queues), inspect these
|
||||
resources (e.g. how many messages are currently held in a queue) and interact with it (e.g. to remove messages from a
|
||||
queue), and exposes key metrics using JMX (using role-based authentication using Artemis's JAAS plug-in support to
|
||||
ensure Artemis cannot be controlled via JMX)..
|
||||
|
||||
##### Restrictions
|
||||
|
||||
- As of Corda M11, Java serialisation in the Corda node has been restricted, meaning MBeans access via the JMX port will no longer work.
|
||||
- Usage of Jolokia requires bundling an associated *jolokia-agent-war* file on the classpath, and associated configuration to export JMX monitoring statistics and data over the Jolokia REST/JSON interface. An associated *jolokia-access.xml* configuration file defines role based permissioning to HTTP operations.
|
||||
As of Corda M11, Java serialisation in the Corda node has been restricted, meaning MBeans access via the JMX port will no longer work.
|
||||
|
||||
Usage of Jolokia requires bundling an associated *jolokia-agent-war* file on the classpath, and associated configuration
|
||||
to export JMX monitoring statistics and data over the Jolokia REST/JSON interface. An associated *jolokia-access.xml*
|
||||
configuration file defines role based permissioning to HTTP operations.
|
||||
|
||||
## Complementary solutions
|
||||
|
||||
A number of 3rd party libraries and frameworks have been proposed which solve different parts of the end to end solution, albeit with most focusing on the Agent Collector (eg. collect metrics from systems then output them to some backend storage.), Event Storage and Search, and Visualization aspects of Systems Management and Monitoring. These include:
|
||||
A number of 3rd party libraries and frameworks have been proposed which solve different parts of the end to end
|
||||
solution, albeit with most focusing on the Agent Collector (eg. collect metrics from systems then output them to some
|
||||
backend storage.), Event Storage and Search, and Visualization aspects of Systems Management and Monitoring. These
|
||||
include:
|
||||
|
||||
| Solution | Type (OS/£) | Description |
|
||||
| ---------------------------------------- | ----------- | ---------------------------------------- |
|
||||
@ -198,17 +207,14 @@ A number of 3rd party libraries and frameworks have been proposed which solve di
|
||||
|
||||
Most of the above solutions are not within the scope of this design proposal, but should be capable of ingesting the outputs (logging and metrics) defined by this design.
|
||||
|
||||
TECHNICAL DESIGN
|
||||
---
|
||||
## Technical design
|
||||
|
||||
In general, the requirements outlined in this design are cross-cutting concerns which affect the Corda codebase holistically, both for logging and capture/export of JMX metrics.
|
||||
|
||||
## Interfaces
|
||||
### Interfaces
|
||||
|
||||
* Public APIs impacted
|
||||
* No Public API's are impacted.
|
||||
|
||||
|
||||
* Internal APIs impacted
|
||||
* No identified internal API's are impacted.
|
||||
* Services impacted:
|
||||
@ -228,7 +234,7 @@ In general, the requirements outlined in this design are cross-cutting concerns
|
||||
* Modules impacted
|
||||
* All modules packaged and shipped as part of a Corda distribution (as published to Artifactory / Maven): *core, node, node-api, node-driver, finance, confidential-identities, test-common, test-utils, verifier, webserver, jackson, jfx, mock, rpc*
|
||||
|
||||
## Functional
|
||||
### Functional
|
||||
|
||||
#### Health Checker
|
||||
|
||||
@ -393,10 +399,7 @@ See Appendix C for summary of current Logging and Progress Tracker Reporting cov
|
||||
|
||||
[VisualVM](http://visualvm.github.io/) is a visual tool integrating commandline JDK tools and lightweight profiling capabilities.
|
||||
|
||||
APPENDICES
|
||||
---
|
||||
|
||||
### Appendix A - Corda exposed JMX Metrics
|
||||
## Appendix A - Corda exposed JMX Metrics
|
||||
|
||||
The following metrics are exposed directly by a Corda Node at run-time:
|
||||
|
||||
@ -418,7 +421,7 @@ The following metrics are exposed directly by a Corda Node at run-time:
|
||||
|
||||
Additionally, JMX metrics are also generated within the Corda *node-driver* performance testing utilities. Specifically, the `startPublishingFixedRateInjector` defines and exposes `QueueSize` and `WorkDuration` metrics.
|
||||
|
||||
### Appendix B - Corda Logging and Reporting coverage
|
||||
## Appendix B - Corda Logging and Reporting coverage
|
||||
|
||||
Primary node services exposed publically via ServiceHub (SH) or internally by ServiceHubInternal (SHI):
|
||||
|
||||
@ -501,7 +504,7 @@ Confidential identities flows:
|
||||
| IdentitySyncFlow.Send | none | IllegalArgumentException via `require` assertions, IllegalStateException | SYNCING_IDENTITIES |
|
||||
| IdentitySyncFlow.Receive | none | CertificateExpiredException, CertificateNotYetValidException, InvalidAlgorithmParameterException | RECEIVING_IDENTITIES, RECEIVING_CERTIFICATES |
|
||||
|
||||
#####Appendix C - Apache Artemis JMX Event types and Queuing Metrics.
|
||||
## Appendix C - Apache Artemis JMX Event types and Queuing Metrics.
|
||||
|
||||
The following table contains a list of Notification Types and associated perceived importance to a Corda node at run-time:
|
||||
|
@ -1,12 +1,9 @@
|
||||

|
||||
|
||||
--------------------------------------------
|
||||
Design Decision: Notary Backend - Galera or Permazen Raft
|
||||
=========================================================
|
||||
# Design Decision: Notary Backend - Galera or Permazen Raft
|
||||
|
||||
## Background / Context
|
||||
|
||||
We have evaluated Galera and Permazen as a possible replacement for Atomix CopyCat for the storage backend of our Notary Service, more specificalyl the Uniqueness Provider.
|
||||
We have evaluated Galera and Permazen as a possible replacement for Atomix CopyCat for the storage backend of our Notary
|
||||
Service, more specificalyl the Uniqueness Provider.
|
||||
|
||||
## Options Analysis
|
||||
|
||||
@ -14,7 +11,8 @@ We have evaluated Galera and Permazen as a possible replacement for Atomix CopyC
|
||||
|
||||
#### Advantages
|
||||
|
||||
1. Wider user base. In a survey of 478 OpenStack deployments, 32% decided to use Galera Cluster in production, see p. 47 of the [survey](https://www.openstack.org/assets/survey/April2017SurveyReport.pdf).
|
||||
1. Wider user base. In a survey of 478 OpenStack deployments, 32% decided to use Galera Cluster in production, see p. 47
|
||||
of the [survey](https://www.openstack.org/assets/survey/April2017SurveyReport.pdf).
|
||||
|
||||
2. Very little additional work needed.
|
||||
|
226
docs/source/design/notary-service-ha/design.md
Normal file
@ -0,0 +1,226 @@
|
||||
# HA Notary Service
|
||||
|
||||
## Overview
|
||||
|
||||
The distributed notary service tracks spent contract states and prevents double spending. For high-availability (HA),
|
||||
the backing data store is replicated across a cluster of machines in different data centers. In this model, the cluster
|
||||
is meant to be operated by a single party, and only crash faults are tolerated.
|
||||
|
||||
## Background
|
||||
|
||||
We have an existing HA notary service based on Atomix CopyCat, which an open source state machine replication library
|
||||
that implemets the Raft consensus algorithm. However, it doesn't scale well with the number of spent input states, since
|
||||
CopyCat takes periodic snapshots of the state machine and the snapshots have to fit in memory.
|
||||
|
||||
As an alternative, we propose using a more traditional MySQL database-based approach, using Galera Cluster, which
|
||||
provides synchronous multi-master replication. Galera Cluster is based on a MySQL server with Write-Set replication
|
||||
(wsrep) API, and the Galera Replication Plugin. Through the wsrep API Galera provides [certification-based replication](http://galeracluster.com/documentation-webpages/certificationbasedreplication.html). It works roughly as
|
||||
follows:
|
||||
|
||||
1. A single database node executes a transaction optimistically until it reaches the commit point.
|
||||
2. Changes made by the trasaction are collected into a write-set.
|
||||
3. The write-set broadcasted to the cluster.
|
||||
4. Every other node determines whether it can apply the write-set without conflicts.
|
||||
5. In case of conflict, the initial node rolls back the transaction.
|
||||
|
||||
There are different Galera Cluster implementations, and we chose the Percona XtraDB cluster, as they were historically
|
||||
more focused on performance than the competition.
|
||||
|
||||
### Decisions
|
||||
|
||||
- We are replacing the Atomix CopyCat Raft service.
|
||||
- We are using a Percona cluster for Corda Connect.
|
||||
- We keep investigating a more scalable solution, based on Permazen or a custom implementation.
|
||||
- In the long term, we are interested in providing a BFT solution, perhaps leveraging SGX.
|
||||
|
||||
.. toctree::
|
||||
|
||||
decisions/decision.md
|
||||
|
||||
#### Advantages of Percona
|
||||
|
||||
- Production ready
|
||||
- Works out of the box
|
||||
- Backed by a company, enterprise and a community support are available
|
||||
- Running stable at 30 tx/second (with 10 input states / tx), see figure below, in the section about the long running test
|
||||
|
||||
#### Disadvantages of Percona
|
||||
|
||||
- Performance deteriorates over time. This happens because Galera only works with the InnoDB storage engine, internally
|
||||
backed by a B+ tree. Since we use state references as primary keys, table insterts results in random B+ tree inserts,
|
||||
which doesn't scale well.
|
||||
|
||||
## Scope
|
||||
|
||||
### Goals
|
||||
|
||||
* We need a stable notary implementation.
|
||||
* The implementation has to be easy to operate.
|
||||
* We know that the switching costs to a more scalable solution are minimal.
|
||||
* We take periodic backups of the consumed states and we test the recovery.
|
||||
* We remain flexible and open to future requirements.
|
||||
|
||||
### Non-Goals
|
||||
|
||||
* For the time being, we don't need a solution that is shardable (for now, all replicas can hold all the state).
|
||||
* We don't require a solution that can handle throughput beyond 15 tx/second.
|
||||
* We don't design and implement a custom solution in the short term.
|
||||
* We don't need rate limiting and fairness.
|
||||
|
||||
## Design
|
||||
|
||||

|
||||
|
||||
The HA notary service relies on the underlying MySQL uniqueness provider on top of a Percona XtraDB Cluster to prevent
|
||||
double spending of input states. The exact data center locations are to be determined. Our notary service replicas
|
||||
connect via JDBC to the replicated MySQL service.
|
||||
|
||||
Percona XtraDB Cluster is based on Percona Server and the Galera replication library that provides a multi master
|
||||
cluster based on synchronous replication. The cluster is as good as its slowest node.
|
||||
|
||||
## Main Data Structure
|
||||
|
||||
The table below details the data base schema.
|
||||
|
||||
| Field name | Type | Description |
|
||||
| --------------------- | ------------ | ---------------------------------------- |
|
||||
| issue_tx_id | Binary(32) | The ID of the transaction that created the state |
|
||||
| issue_tx_output_id | Int unsigned | Where in the transaction the state was created |
|
||||
| consuming_tx_id | Binary(32) | The ID of the transaction that consumes the input state |
|
||||
| consuming_tx_input_id | Int unsigned | Where in the transaction the state is consumed |
|
||||
| consuming_party | Blob | Who is requesting the notarisation (~1 kByte) |
|
||||
| commit_time | Timestamp | When this row is committed |
|
||||
|
||||
## Functional
|
||||
|
||||
The notary service relies on the MySQL uniqueness provider to prevent double spending. The MySQL database holds a single
|
||||
table as described above. For HA, the data is synchronously replicated to several nodes by the Galera replication
|
||||
plugin.
|
||||
|
||||
During notarisation, the uniqueness provider attempts to commit all input states of the Corda transaction in a single
|
||||
database transaction. If at least one input state has been previously spent, the entire database transaction fails with
|
||||
a batch exception. Unspent states can still be spent in a different later transaction. In case of double spend attempts,
|
||||
the uniqueness provider queries the database for details where the conflicting states have been spent. The consuming
|
||||
transaction ID, position of the input in the transaction and the requesting party are collected for all conflicting
|
||||
inputs, wrapped in a uniqueness exception, thrown by the uniqueness provider. This exception is handled by the notary
|
||||
service and turned into a notary exception.
|
||||
|
||||
We are using the Hikari connection pool to connect the notary services to all nodes of our Percona cluster. The
|
||||
connection pool can be monitored via JMX.
|
||||
|
||||
### Deployment
|
||||
|
||||
We are planning to run a five node Percona cluster that can tolerate two simultaneous node failures. In case we need to
|
||||
provide more storage or upgrade to better hardware we can take a single node down for maintenance and still tolerate one
|
||||
unplanned failure.
|
||||
|
||||
#### Monitoring cluster membership changes
|
||||
|
||||
We setup a [notification command](http://galeracluster.com/documentation-webpages/notificationcmd.html) that gets called
|
||||
whenever the node registers a change.
|
||||
|
||||
### Management
|
||||
|
||||
#### Disaster Recovery
|
||||
|
||||
Our disaster recovery strategy covers the following risks:
|
||||
1. **Host Failure**. For the 5 node cluster we can tolerate 2 host failures without interrupting operation. This includes both machine and disk failures.
|
||||
2. **DC Failure**. The cluster will be distributed across 3 data centers in a 2+2+1 configuration. A loss of one data center can be tolerated without interrupting service operation.
|
||||
3. **Data Corruption/Loss**. In cases of data corruption or loss that is replicated across the cluster (for example, accidental data deletion or modification by an administrator) backups will be used to restore the cluster state. In this scenario service downtime will be incurred.
|
||||
|
||||
#### Backup and Recovery
|
||||
|
||||
Recovery Point Objective: 0
|
||||
|
||||
Recovery Time Objective: 1h
|
||||
|
||||
Any data loss incurred by the notary service will lead to a compromised ledger, since participants would be able to
|
||||
double-spend already notarised states. Note that the backup & recovery procedure is only required for mitigating data
|
||||
loss that gets replicated to the entire cluster.
|
||||
|
||||
This can be achieved by combining periodic backups of the entire database state, and the MySQL [binary
|
||||
log](https://dev.mysql.com/doc/refman/5.7/en/binary-log.html). The binary log contains a log of all executed SQL
|
||||
statements, which can be replayed onto a backup to restore the most up-to-date state. In case of an accidental statement
|
||||
that removes data (e.g. DROP TABLE), the binary log can be replayed only up to the offending statement.
|
||||
|
||||
Scenarios where data corruption is caused by a malicious administrator selectively modifying or removing table rows are
|
||||
out of scope.
|
||||
|
||||
See [Galera's backup documentation](http://galeracluster.com/documentation-webpages/backingupthecluster.html)
|
||||
|
||||
#### Monitoring
|
||||
|
||||
See the [Percona Management and Monitoring](https://www.percona.com/doc/percona-monitoring-and-management/index.html) documentation.
|
||||
|
||||
* Throughput in Tx / second
|
||||
* Throughput in Input states / second
|
||||
* Double spend attempts / time
|
||||
* High level statistics, e.g. number of double spend attempts in the last 24 hours by two parties
|
||||
* Double spend attempts per party
|
||||
* Latency p50, p99
|
||||
* Number of input states in DB
|
||||
* Size of DB
|
||||
* Replication Queues, see [monitoring Galera](http://galeracluster.com/documentation-webpages/monitoringthecluster.html)
|
||||
|
||||
#### Alerting
|
||||
|
||||
Alerts are triggered based on relevant metrics, like number of active members in the cluster and size of write queues of
|
||||
individual nodes. We are configuring PMM to forward alerts to PagerDuty, where we do the routing to the operators who
|
||||
are on call. We configure email alerting and slack integration as additional channels.
|
||||
|
||||
## Security
|
||||
|
||||
SSL encrypted links between the nodes of the Galera cluster and the notary service and the Galera cluster. See the [SSL
|
||||
config documentation](http://galeracluster.com/documentation-webpages/sslconfig.html).
|
||||
|
||||
The managed disks on Azure [are encrypted](https://azure.microsoft.com/en-gb/blog/azure-managed-disks-sse/) with keys
|
||||
managed by Microsoft. We have to trust our cloud provider anyways, so we don't do our own disk encryption.
|
||||
|
||||
## Testing the throughput of the uniqueness provider
|
||||
|
||||
We are using a custom load test flow that includes double spend attempts. The application metrics are forwarded to
|
||||
Graphite and our Percona cluster is monitored by Percona's metrics and monitoring tool (PMM).
|
||||
|
||||
In our tests, the number of input states is Poisson-distributed with an average four input states per transaction. To
|
||||
increase throughput in terms of notarised input states per second, we could batch transactions in the future. We tested
|
||||
batching with batch sizes of up to 1000 input states per batch. And reached a throughput of 2k input states / second for
|
||||
batch sizes 250-1000. When we detect a double spend attempt, we could send through individual transactions to find the
|
||||
source of the conflict or bisect the batch.
|
||||
|
||||
## Long running test
|
||||
|
||||

|
||||
|
||||
The figure above shows the throughput in transactions per second over four days, while writing to the cluster with up to
|
||||
three clients. The dips occur while we take nodes off-line to simulate failure and to upgrade the disks. In the last
|
||||
phase of the test all nodes were equipped with managed 1TB SSDs and and the cluster notarised at more than 300 input
|
||||
states per second while holding more than 100 M input states in the DB.
|
||||
|
||||
Glitches in throughput can occur when the write queue of a node is filling up. I'm assuming this is due to increased
|
||||
disk latency when the cloud SAN disk is busy with other operations. When the maximum write queue size is reached, the
|
||||
slow node isn't accepting writes any more and sends out flow control messages to its peers to stop replicating (I'm
|
||||
assuming this leads to messages being queued in their send queue). The queue sizes are monitored by the PMM tool and we
|
||||
can setup alerts based on a configured maximum write queue size or when we see "flow control messages".
|
||||
|
||||
We found that managed SSDs of 1TB in size performed better than a RAID 10 array of four 128GB SSDs. The latency of the
|
||||
1TB SSDs was stable around 8ms, while we have observed latency spikes up to 64ms on the smaller SSDs. The disk load on
|
||||
the slowest node in terms of disk latency was around 6-8 outstanding writes during the last phase of the test. Setting
|
||||
up a RAID 10 was a mistake, for best performance we should have used a RAID 0 configuration, since the Azure disks are
|
||||
replicated.
|
||||
|
||||

|
||||
|
||||
### Recommended Cloud Server Configuration
|
||||
|
||||
We recommend `Standard DS13 v2 (8 cores, 56 GB memory)` servers with 1 TB managed SSD disks attached. To make the setup
|
||||
more cost effective, we can run on more affordable cloud instances, when we have lower demands in terms of throughput.
|
||||
The optimum is yet to be found. It is possible to upgrade or downgrade the nodes of the cluster, one node at a time.
|
||||
|
||||
Be prepared to kill and replace the slowest node of the cluster, especially in the cloud, since the Galera cluster will
|
||||
not perform better than the slowest node. The same goes for SAN disks. If you are unlucky and your disk has high
|
||||
latency, try replacing it with a new one. Maybe your get better performance with your new disk.
|
||||
|
||||
### Disk upgrade using LVM
|
||||
|
||||
We recommend using LVM in production for convenience and flexibility. During our long running test we performed a hot
|
||||
disk upgrade using LVM.
|
Before Width: | Height: | Size: 126 KiB After Width: | Height: | Size: 126 KiB |
Before Width: | Height: | Size: 40 KiB After Width: | Height: | Size: 40 KiB |
Before Width: | Height: | Size: 111 KiB After Width: | Height: | Size: 111 KiB |
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
Before Width: | Height: | Size: 538 KiB After Width: | Height: | Size: 538 KiB |
@ -1,201 +0,0 @@
|
||||

|
||||
|
||||
# HA Notary Service Design
|
||||
|
||||
DOCUMENT MANAGEMENT
|
||||
---
|
||||
|
||||
## Document Control
|
||||
|
||||
| Title | HA Notary Design |
|
||||
| -------------------- | ---------------------------------------- |
|
||||
| Date | 13 December 2017 |
|
||||
| Author | Thomas Schroeter and Andrius Dagys |
|
||||
| Distribution | Design Review Board, Product Management, Services - Technical (Consulting), Platform Delivery |
|
||||
| Corda target version | Enterprise |
|
||||
| JIRA reference | https://r3-cev.atlassian.net/browse/ENT-1232 |
|
||||
|
||||
## Approvals
|
||||
|
||||
#### Document Sign-off
|
||||
|
||||
| Author | |
|
||||
| ----------------- | ---------------------------------------- |
|
||||
| Reviewer(s) | (GitHub PR reviewers) |
|
||||
| Final approver(s) | (GitHub PR approver(s) from Design Approval Board) |
|
||||
|
||||
#### Design Decisions
|
||||
|
||||
| Description | Recommendation |
|
||||
| ---------------------------------------- | -------------- |
|
||||
| [Galera or Permazen](decisions/decision.md) | Galera |
|
||||
|
||||
HIGH LEVEL DESIGN
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
The distributed notary service tracks spent contract states and prevents double spending. For high-availability (HA), the backing data store is replicated across a cluster of machines in different data centers. In this model, the cluster is meant to be operated by a single party, and only crash faults are tolerated.
|
||||
|
||||
## Background
|
||||
|
||||
We have an existing HA notary service based on Atomix CopyCat, which an open source state machine replication library that implemets the Raft consensus algorithm. However, it doesn't scale well with the number of spent input states, since CopyCat takes periodic snapshots of the state machine and the snapshots have to fit in memory.
|
||||
|
||||
As an alternative, we propose using a more traditional MySQL database-based approach, using Galera Cluster, which provides synchronous multi-master replication. Galera Cluster is based on a MySQL server with Write-Set replication (wsrep) API, and the Galera Replication Plugin. Through the wsrep API Galera provides [certification-based replication](http://galeracluster.com/documentation-webpages/certificationbasedreplication.html). It works roughly as follows:
|
||||
1. A single database node executes a transaction optimistically until it reaches the commit point.
|
||||
2. Changes made by the trasaction are collected into a write-set.
|
||||
3. The write-set broadcasted to the cluster.
|
||||
4. Every other node determines whether it can apply the write-set without conflicts.
|
||||
5. In case of conflict, the initial node rolls back the transaction.
|
||||
|
||||
There are different Galera Cluster implementations, and we chose the Percona XtraDB cluster, as they were historically more focused on performance than the competition.
|
||||
|
||||
### Decisions
|
||||
|
||||
- We are replacing the Atomix CopyCat Raft service.
|
||||
- We are using a Percona cluster for Corda Connect.
|
||||
- We keep investigating a more scalable solution, based on Permazen or a custom implementation.
|
||||
- In the long term, we are interested in providing a BFT solution, perhaps leveraging SGX.
|
||||
|
||||
#### Advantages of Percona
|
||||
|
||||
- Production ready
|
||||
- Works out of the box
|
||||
- Backed by a company, enterprise and a community support are available
|
||||
- Running stable at 30 tx/second (with 10 input states / tx), see figure below, in the section about the long running test
|
||||
|
||||
#### Disadvantages of Percona
|
||||
|
||||
- Performance deteriorates over time. This happens because Galera only works with the InnoDB storage engine, internally backed by a B+ tree. Since we use state references as primary keys, table insterts results in random B+ tree inserts, which doesn't scale well.
|
||||
|
||||
## Scope
|
||||
|
||||
### Goals
|
||||
|
||||
* We need a stable notary implementation.
|
||||
* The implementation has to be easy to operate.
|
||||
* We know that the switching costs to a more scalable solution are minimal.
|
||||
* We take periodic backups of the consumed states and we test the recovery.
|
||||
* We remain flexible and open to future requirements.
|
||||
|
||||
### Non-Goals
|
||||
|
||||
* For the time being, we don't need a solution that is shardable (for now, all replicas can hold all the state).
|
||||
* We don't require a solution that can handle throughput beyond 15 tx/second.
|
||||
* We don't design and implement a custom solution in the short term.
|
||||
* We don't need rate limiting and fairness.
|
||||
|
||||
## Design Decisions
|
||||
|
||||

|
||||
|
||||
The HA notary service relies on the underlying MySQL uniqueness provider on top of a Percona XtraDB Cluster to prevent double spending of input states. The exact data center locations are to be determined. Our notary service replicas connect via JDBC to the replicated MySQL service.
|
||||
|
||||
Percona XtraDB Cluster is based on Percona Server and the Galera replication library that provides a multi master cluster based on synchronous replication. The cluster is as good as its slowest node.
|
||||
|
||||
TECHNICAL DETAILS
|
||||
---
|
||||
|
||||
## Main Data Structure
|
||||
|
||||
The table below details the data base schema.
|
||||
|
||||
| Field name | Type | Description |
|
||||
| --------------------- | ------------ | ---------------------------------------- |
|
||||
| issue_tx_id | Binary(32) | The ID of the transaction that created the state |
|
||||
| issue_tx_output_id | Int unsigned | Where in the transaction the state was created |
|
||||
| consuming_tx_id | Binary(32) | The ID of the transaction that consumes the input state |
|
||||
| consuming_tx_input_id | Int unsigned | Where in the transaction the state is consumed |
|
||||
| consuming_party | Blob | Who is requesting the notarisation (~1 kByte) |
|
||||
| commit_time | Timestamp | When this row is committed |
|
||||
|
||||
## Functional
|
||||
|
||||
The notary service relies on the MySQL uniqueness provider to prevent double spending. The MySQL database holds a single table as described above. For HA, the data is synchronously replicated to several nodes by the Galera replication plugin.
|
||||
|
||||
During notarisation, the uniqueness provider attempts to commit all input states of the Corda transaction in a single database transaction. If at least one input state has been previously spent, the entire database transaction fails with a batch exception. Unspent states can still be spent in a different later transaction. In case of double spend attempts, the uniqueness provider queries the database for details where the conflicting states have been spent. The consuming transaction ID, position of the input in the transaction and the requesting party are collected for all conflicting inputs, wrapped in a uniqueness exception, thrown by the uniqueness provider. This exception is handled by the notary service and turned into a notary exception.
|
||||
|
||||
We are using the Hikari connection pool to connect the notary services to all nodes of our Percona cluster. The connection pool can be monitored via JMX.
|
||||
|
||||
### Deployment
|
||||
|
||||
We are planning to run a five node Percona cluster that can tolerate two simultaneous node failures. In case we need to provide more storage or upgrade to better hardware we can take a single node down for maintenance and still tolerate one unplanned failure.
|
||||
|
||||
#### Monitoring cluster membership changes
|
||||
|
||||
We setup a [notification command](http://galeracluster.com/documentation-webpages/notificationcmd.html) that gets called whenever the node registers a change.
|
||||
|
||||
### Management
|
||||
|
||||
#### Disaster Recovery
|
||||
|
||||
Our disaster recovery strategy covers the following risks:
|
||||
1. **Host Failure**. For the 5 node cluster we can tolerate 2 host failures without interrupting operation. This includes both machine and disk failures.
|
||||
2. **DC Failure**. The cluster will be distributed across 3 data centers in a 2+2+1 configuration. A loss of one data center can be tolerated without interrupting service operation.
|
||||
3. **Data Corruption/Loss**. In cases of data corruption or loss that is replicated across the cluster (for example, accidental data deletion or modification by an administrator) backups will be used to restore the cluster state. In this scenario service downtime will be incurred.
|
||||
|
||||
##### Backup and Recovery
|
||||
|
||||
Recovery Point Objective: 0
|
||||
|
||||
Recovery Time Objective: 1h
|
||||
|
||||
Any data loss incurred by the notary service will lead to a compromised ledger, since participants would be able to double-spend already notarised states. Note that the backup & recovery procedure is only required for mitigating data loss that gets replicated to the entire cluster.
|
||||
|
||||
This can be achieved by combining periodic backups of the entire database state, and the MySQL [binary log](https://dev.mysql.com/doc/refman/5.7/en/binary-log.html). The binary log contains a log of all executed SQL statements, which can be replayed onto a backup to restore the most up-to-date state. In case of an accidental statement that removes data (e.g. DROP TABLE), the binary log can be replayed only up to the offending statement.
|
||||
|
||||
Scenarios where data corruption is caused by a malicious administrator selectively modifying or removing table rows are out of scope.
|
||||
|
||||
See [Galera's backup documentation](http://galeracluster.com/documentation-webpages/backingupthecluster.html)
|
||||
|
||||
#### Monitoring
|
||||
|
||||
See the [Percona Management and Monitoring](https://www.percona.com/doc/percona-monitoring-and-management/index.html) documentation.
|
||||
|
||||
* Throughput in Tx / second
|
||||
* Throughput in Input states / second
|
||||
* Double spend attempts / time
|
||||
* High level statistics, e.g. number of double spend attempts in the last 24 hours by two parties
|
||||
* Double spend attempts per party
|
||||
* Latency p50, p99
|
||||
* Number of input states in DB
|
||||
* Size of DB
|
||||
* Replication Queues, see [monitoring Galera](http://galeracluster.com/documentation-webpages/monitoringthecluster.html)
|
||||
|
||||
#### Alerting
|
||||
|
||||
Alerts are triggered based on relevant metrics, like number of active members in the cluster and size of write queues of individual nodes. We are configuring PMM to forward alerts to PagerDuty, where we do the routing to the operators who are on call. We configure email alerting and slack integration as additional channels.
|
||||
|
||||
## Security
|
||||
|
||||
SSL encrypted links between the nodes of the Galera cluster and the notary service and the Galera cluster. See the [SSL config documentation](http://galeracluster.com/documentation-webpages/sslconfig.html).
|
||||
|
||||
The managed disks on Azure [are encrypted](https://azure.microsoft.com/en-gb/blog/azure-managed-disks-sse/) with keys managed by Microsoft. We have to trust our cloud provider anyways, so we don't do our own disk encryption.
|
||||
|
||||
## Testing the throughput of the uniqueness provider
|
||||
|
||||
We are using a custom load test flow that includes double spend attempts. The application metrics are forwarded to Graphite and our Percona cluster is monitored by Percona's metrics and monitoring tool (PMM).
|
||||
|
||||
In our tests, the number of input states is Poisson-distributed with an average four input states per transaction. To increase throughput in terms of notarised input states per second, we could batch transactions in the future. We tested batching with batch sizes of up to 1000 input states per batch. And reached a throughput of 2k input states / second for batch sizes 250-1000. When we detect a double spend attempt, we could send through individual transactions to find the source of the conflict or bisect the batch.
|
||||
|
||||
## Long running test
|
||||
|
||||

|
||||
|
||||
The figure above shows the throughput in transactions per second over four days, while writing to the cluster with up to three clients. The dips occur while we take nodes off-line to simulate failure and to upgrade the disks. In the last phase of the test all nodes were equipped with managed 1TB SSDs and and the cluster notarised at more than 300 input states per second while holding more than 100 M input states in the DB.
|
||||
|
||||
Glitches in throughput can occur when the write queue of a node is filling up. I'm assuming this is due to increased disk latency when the cloud SAN disk is busy with other operations. When the maximum write queue size is reached, the slow node isn't accepting writes any more and sends out flow control messages to its peers to stop replicating (I'm assuming this leads to messages being queued in their send queue). The queue sizes are monitored by the PMM tool and we can setup alerts based on a configured maximum write queue size or when we see "flow control messages".
|
||||
|
||||
We found that managed SSDs of 1TB in size performed better than a RAID 10 array of four 128GB SSDs. The latency of the 1TB SSDs was stable around 8ms, while we have observed latency spikes up to 64ms on the smaller SSDs. The disk load on the slowest node in terms of disk latency was around 6-8 outstanding writes during the last phase of the test. Setting up a RAID 10 was a mistake, for best performance we should have used a RAID 0 configuration, since the Azure disks are replicated.
|
||||
|
||||

|
||||
|
||||
### Recommended Cloud Server Configuration
|
||||
|
||||
We recommend `Standard DS13 v2 (8 cores, 56 GB memory)` servers with 1 TB managed SSD disks attached. To make the setup more cost effective, we can run on more affordable cloud instances, when we have lower demands in terms of throughput. The optimum is yet to be found. It is possible to upgrade or downgrade the nodes of the cluster, one node at a time.
|
||||
|
||||
Be prepared to kill and replace the slowest node of the cluster, especially in the cloud, since the Galera cluster will not perform better than the slowest node. The same goes for SAN disks. If you are unlucky and your disk has high latency, try replacing it with a new one. Maybe your get better performance with your new disk.
|
||||
|
||||
### Disk upgrade using LVM
|
||||
|
||||
We recommend using LVM in production for convenience and flexibility. During our long running test we performed a hot disk upgrade using LVM.
|
76
docs/source/design/template/design.md
Normal file
@ -0,0 +1,76 @@
|
||||
# Design doc template
|
||||
|
||||
## Overview
|
||||
|
||||
Please read the [Design Review Process](../design-review-process.md) before completing a design.
|
||||
|
||||
Each section of the document should be at the second level (two hashes at the start of a line).
|
||||
|
||||
This section should describe the desired change or feature, along with background on why it's needed and what problem
|
||||
it solves.
|
||||
|
||||
An outcome of the design document should be an implementation plan that defines JIRA stories and tasks to be completed
|
||||
to produce shippable, demonstrable, executable code.
|
||||
|
||||
Please complete and/or remove section headings as appropriate to the design being proposed. These are provided as
|
||||
guidance and to structure the design in a consistent and coherent manner.
|
||||
|
||||
## Background
|
||||
|
||||
Description of existing solution (if any) and/or rationale for requirement.
|
||||
|
||||
* Reference(s) to discussions held elsewhere (slack, wiki, etc).
|
||||
* Definitions, acronyms and abbreviations
|
||||
|
||||
## Goals
|
||||
|
||||
What's in scope to be solved.
|
||||
|
||||
## Non-goals
|
||||
|
||||
What won't be tackled as part of this design, either because it's not needed/wanted, or because it will be tackled later
|
||||
as part of a separate design effort. Figuring out what you will *not* do is frequently a useful exercise.
|
||||
|
||||
## Timeline
|
||||
|
||||
* Is this a short, medium or long-term solution?
|
||||
* Where short-term design, is this evolvable / extensible or stop-gap (eg. potentially throwaway)?
|
||||
|
||||
## Requirements
|
||||
|
||||
* Reference(s) to any of following:
|
||||
* Captured Product Backlog JIRA entry
|
||||
* Internal White Paper feature item and/or visionary feature
|
||||
* Project related requirement (POC, RFP, Pilot, Prototype) from
|
||||
* Internal Incubator / Accelerator project
|
||||
* Direct from Customer, ISV, SI, Partner
|
||||
* Use Cases
|
||||
* Assumptions
|
||||
|
||||
## Design Decisions
|
||||
|
||||
List of design decisions identified in defining the target solution.
|
||||
|
||||
For each item, please complete the attached [Design Decision template](decisions/decision.html)
|
||||
|
||||
Use the ``.. toctree::`` feature to list out the design decision docs here (see the source of this file for an example).
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
decisions/decision.md
|
||||
|
||||
## Design
|
||||
|
||||
Think about:
|
||||
|
||||
* Public API, backwards compatibility impact.
|
||||
* UI requirements, if any. Illustrate with UI Mockups and/or wireframes.
|
||||
* Data model & serialization impact and changes required.
|
||||
* Infrastructure services: persistence (schemas), messaging.
|
||||
* Impact on performance, scalability, high availability
|
||||
* Versioning, upgradability, migration=
|
||||
* Management: audit, alerting, monitoring, backup/recovery, archiving
|
||||
* Data privacy, authentication, access control
|
||||
* Logging
|
||||
* Testability
|
@ -30,18 +30,47 @@ We look forward to seeing what you can do with Corda!
|
||||
.. _`download the PDF`: _static/corda-developer-site.pdf
|
||||
|
||||
.. toctree::
|
||||
:caption: Development
|
||||
:maxdepth: 1
|
||||
|
||||
quickstart-index.rst
|
||||
key-concepts.rst
|
||||
operations-guide.rst
|
||||
building-a-cordapp-index.rst
|
||||
corda-nodes-index.rst
|
||||
corda-networks-index.rst
|
||||
tutorials-index.rst
|
||||
tools-index.rst
|
||||
node-internals-index.rst
|
||||
component-library-index.rst
|
||||
release-process-index.rst
|
||||
troubleshooting.rst
|
||||
other-index.rst
|
||||
|
||||
.. toctree::
|
||||
:caption: Operations
|
||||
:maxdepth: 2
|
||||
|
||||
corda-nodes-index.rst
|
||||
corda-networks-index.rst
|
||||
azure-vm.rst
|
||||
aws-vm.rst
|
||||
loadtesting.rst
|
||||
|
||||
.. toctree::
|
||||
:caption: Design docs
|
||||
:maxdepth: 2
|
||||
|
||||
design/design-review-process.md
|
||||
design/certificate-hierarchies/design.md
|
||||
design/failure-detection-master-election/design.md
|
||||
design/float/design.md
|
||||
design/hadr/design.md
|
||||
design/kafka-notary/design.md
|
||||
design/monitoring-management/design.md
|
||||
design/notary-service-ha/design.md
|
||||
|
||||
.. toctree::
|
||||
:caption: Participate
|
||||
:maxdepth: 2
|
||||
|
||||
release-process-index.rst
|
||||
corda-repo-layout.rst
|
||||
building-the-docs.rst
|
||||
json.rst
|
||||
|
@ -1,9 +0,0 @@
|
||||
Other
|
||||
=====
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
corda-repo-layout
|
||||
building-the-docs
|
||||
json
|
@ -269,8 +269,8 @@ SecureHash
|
||||
~~~~~~~~~~
|
||||
A parameter of type ``SecureHash`` can be written as a hexadecimal string: ``F69A7626ACC27042FEEAE187E6BFF4CE666E6F318DC2B32BE9FAF87DF687930C``
|
||||
|
||||
OpaqueBytes
|
||||
~~~~~~~~~~~
|
||||
OpaqueBytes and SerializedBytes
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
A parameter of type ``OpaqueBytes`` can be provided as a string in Base64.
|
||||
|
||||
PublicKey and CompositeKey
|
||||
|
@ -7,6 +7,3 @@ Tools
|
||||
network-simulator
|
||||
demobench
|
||||
node-explorer
|
||||
azure-vm
|
||||
aws-vm
|
||||
loadtesting
|
@ -91,6 +91,8 @@ class RpcExceptionHandlingProxy(private val delegate: SecureCordaRPCOps) : Corda
|
||||
|
||||
override fun acceptNewNetworkParameters(parametersHash: SecureHash) = wrap { delegate.acceptNewNetworkParameters(parametersHash) }
|
||||
|
||||
override fun killFlow(id: StateMachineRunId) = wrap { delegate.killFlow(id) }
|
||||
|
||||
override fun nodeInfo() = wrap(delegate::nodeInfo)
|
||||
|
||||
override fun notaryIdentities() = wrap(delegate::notaryIdentities)
|
||||
@ -131,8 +133,6 @@ class RpcExceptionHandlingProxy(private val delegate: SecureCordaRPCOps) : Corda
|
||||
|
||||
override fun isFlowsDrainingModeEnabled() = wrap(delegate::isFlowsDrainingModeEnabled)
|
||||
|
||||
override fun killFlow(id: StateMachineRunId) = wrap { delegate.killFlow(id) }
|
||||
|
||||
override fun shutdown() = wrap(delegate::shutdown)
|
||||
|
||||
private fun <RESULT> wrap(call: () -> RESULT): RESULT {
|
||||
|
@ -26,6 +26,7 @@ class NodeConfig(
|
||||
val rpcAdminPort: Int,
|
||||
val isNotary: Boolean,
|
||||
val users: List<User>,
|
||||
val devMode: Boolean = true,
|
||||
val runMigration: Boolean = true,
|
||||
val jarDirs: List<String> = emptyList()
|
||||
) {
|
||||
@ -52,6 +53,7 @@ class NodeConfig(
|
||||
.withValue("database", valueFor(mapOf("runMigration" to runMigration)))
|
||||
.withValue("useTestClock", valueFor(true))
|
||||
.withValue("jarDirs", valueFor(jarDirs))
|
||||
.withValue("devMode", valueFor(devMode))
|
||||
return if (isNotary) {
|
||||
config.withValue("notary", ConfigValueFactory.fromMap(mapOf("validating" to true)))
|
||||
} else {
|
||||
|