From 4b5a38796fcc443d4dd6ddb785d4419b7ea3be01 Mon Sep 17 00:00:00 2001 From: Dimos Raptis Date: Fri, 30 Aug 2019 09:24:58 +0100 Subject: [PATCH] [CORDA-3106] - Refine documentation around rpc reconnection (#5405) --- docs/source/clientrpc.rst | 283 +++++++++++++++++++++----------------- 1 file changed, 153 insertions(+), 130 deletions(-) diff --git a/docs/source/clientrpc.rst b/docs/source/clientrpc.rst index 899d9a5b60..f7453b2af9 100644 --- a/docs/source/clientrpc.rst +++ b/docs/source/clientrpc.rst @@ -61,18 +61,16 @@ can perform. Permissions are not required to interact with the node via the shel RPC users are created by adding them to the ``rpcUsers`` list in the node's ``node.conf`` file: -.. container:: codeset +.. sourcecode:: groovy - .. sourcecode:: groovy - - rpcUsers=[ - { - username=exampleUser - password=examplePass - permissions=[] - }, - ... - ] + rpcUsers=[ + { + username=exampleUser + password=examplePass + permissions=[] + }, + ... + ] By default, RPC users are not permissioned to perform any RPC operations. @@ -81,80 +79,72 @@ Granting flow permissions You provide an RPC user with the permission to start a specific flow using the syntax ``StartFlow.``: -.. container:: codeset +.. sourcecode:: groovy - .. sourcecode:: groovy - - rpcUsers=[ - { - username=exampleUser - password=examplePass - permissions=[ - "StartFlow.net.corda.flows.ExampleFlow1", - "StartFlow.net.corda.flows.ExampleFlow2" - ] - }, - ... - ] + rpcUsers=[ + { + username=exampleUser + password=examplePass + permissions=[ + "StartFlow.net.corda.flows.ExampleFlow1", + "StartFlow.net.corda.flows.ExampleFlow2" + ] + }, + ... + ] You can also provide an RPC user with the permission to start any flow using the syntax ``InvokeRpc.startFlow``: -.. container:: codeset +.. sourcecode:: groovy - .. sourcecode:: groovy - - rpcUsers=[ - { - username=exampleUser - password=examplePass - permissions=[ - "InvokeRpc.startFlow" - ] - }, - ... - ] + rpcUsers=[ + { + username=exampleUser + password=examplePass + permissions=[ + "InvokeRpc.startFlow" + ] + }, + ... + ] Granting other RPC permissions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You provide an RPC user with the permission to perform a specific RPC operation using the syntax ``InvokeRpc.``: -.. container:: codeset +.. sourcecode:: groovy - .. sourcecode:: groovy - - rpcUsers=[ - { - username=exampleUser - password=examplePass - permissions=[ - "InvokeRpc.nodeInfo", - "InvokeRpc.networkMapSnapshot" - ] - }, - ... - ] + rpcUsers=[ + { + username=exampleUser + password=examplePass + permissions=[ + "InvokeRpc.nodeInfo", + "InvokeRpc.networkMapSnapshot" + ] + }, + ... + ] Granting all permissions ~~~~~~~~~~~~~~~~~~~~~~~~ You can provide an RPC user with the permission to perform any RPC operation (including starting any flow) using the ``ALL`` permission: -.. container:: codeset +.. sourcecode:: groovy - .. sourcecode:: groovy - - rpcUsers=[ - { - username=exampleUser - password=examplePass - permissions=[ - "ALL" - ] - }, - ... - ] + rpcUsers=[ + { + username=exampleUser + password=examplePass + permissions=[ + "ALL" + ] + }, + ... + ] .. _rpc_security_mgmt_ref: @@ -175,53 +165,49 @@ These features are controlled by a set of options nested in the ``security`` fie The following example shows how to configure retrieval of users credentials and permissions from a remote database with passwords in hash-encrypted format and enable in-memory caching of users data: -.. container:: codeset +.. sourcecode:: groovy - .. sourcecode:: groovy - - security = { - authService = { - dataSource = { - type = "DB" - passwordEncryption = "SHIRO_1_CRYPT" - connection = { - jdbcUrl = "" - username = "" - password = "" - driverClassName = "" - } - } - options = { - cache = { - expireAfterSecs = 120 - maxEntries = 10000 - } + security = { + authService = { + dataSource = { + type = "DB" + passwordEncryption = "SHIRO_1_CRYPT" + connection = { + jdbcUrl = "" + username = "" + password = "" + driverClassName = "" } } + options = { + cache = { + expireAfterSecs = 120 + maxEntries = 10000 + } + } } + } It is also possible to have a static list of users embedded in the ``security`` structure by specifying a ``dataSource`` of ``INMEMORY`` type: -.. container:: codeset +.. sourcecode:: groovy - .. sourcecode:: groovy - - security = { - authService = { - dataSource = { - type = "INMEMORY" - users = [ - { - username = "" - password = "" - permissions = ["", "", ...] - }, - ... - ] - } + security = { + authService = { + dataSource = { + type = "INMEMORY" + users = [ + { + username = "" + password = "" + permissions = ["", "", ...] + }, + ... + ] } } + } .. warning:: A valid configuration cannot specify both the ``rpcUsers`` and ``security`` fields. Doing so will trigger an exception at node startup. @@ -253,11 +239,9 @@ Password encryption Storing passwords in plain text is discouraged in applications where security is critical. Passwords are assumed to be in plain format by default, unless a different format is specified by the ``passwordEncryption`` field, like: -.. container:: codeset +.. sourcecode:: groovy - .. sourcecode:: groovy - - passwordEncryption = SHIRO_1_CRYPT + passwordEncryption = SHIRO_1_CRYPT ``SHIRO_1_CRYPT`` identifies the `Apache Shiro fully reversible Modular Crypt Format `_, @@ -272,16 +256,14 @@ performances in some cases, with the disadvantage of causing a (controllable) de Caching is disabled by default, it can be enabled by defining the ``options.cache`` field in ``security.authService``, for example: -.. container:: codeset +.. sourcecode:: groovy - .. sourcecode:: groovy - - options = { - cache = { - expireAfterSecs = 120 - maxEntries = 10000 - } - } + options = { + cache = { + expireAfterSecs = 120 + maxEntries = 10000 + } + } This will enable a non-persistent cache contained in the node's memory with maximum number of entries set to ``maxEntries`` where entries are expired and refreshed after ``expireAfterSecs`` seconds. @@ -356,28 +338,42 @@ Reconnecting RPC clients ------------------------ In the current version of Corda, an RPC client connected to a node stops functioning when the node becomes unavailable or the associated TCP connection is interrupted. -Running RPC commands against a stopped node will just throw exceptions. Any subscriptions to ``Observable``\s that have been created before the disconnection will stop receiving events after the node restarts. -RPCs which have a side effect, such as starting flows, may or may not have executed on the node depending on when the client was disconnected. +Running RPC commands after this has happened will just throw exceptions. Any subscriptions to ``Observable``\s that have been created before the disconnection will stop receiving events after the connection is re-established. +RPC calls that have a side effect, such as starting flows, may or may not have executed on the node depending on when the client was disconnected. -It is the client's responsibility to handle these errors and reconnect once the node is running again. The client will have to re-subscribe to any ``Observable``\s in order to keep receiving updates. -With regards to RPCs with side effects, the client will have to inspect the state of the node to infer whether the flow was executed or not before retrying it. +It is the responsibility of application code to handle these errors and reconnect once the node is running again. The client will have to retrieve new ``Observable``\s and re-subscribe to them in order to keep receiving updates. +With regards to RPCs with side effects (e.g. flow invocations), the application code will have to inspect the state of the node to infer whether the call was executed on the server side (e.g. if the flow was executed or not) before retrying it. -Clients can make use of the options described below in order to take advantage of some automatic reconnection functionality that mitigates some of these issues. +You can make use of the options described below in order to take advantage of some automatic reconnection functionality that mitigates some of these issues. Enabling automatic reconnection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you provide a list of addresses via the ``haAddressPool`` argument when instantiating a ``CordaRPCClient``, then automatic reconnection will be performed when the existing connection is dropped. -However, any in-flight calls during reconnection will fail and previously returned observables will call ``onError``. The client code is responsible for waiting for the connection to be established -in order to retry any calls, retrieve new observables and re-subscribe to them. +However, the application code is responsible for waiting for the connection to be established again in order to perform any calls, retrieve new observables and re-subscribe to them. +This can be done by doing a simple, side-effect free RPC call (e.g. ``nodeInfo``). + +.. note:: Any RPC calls that had not been acknowledged to the RPC client from the node at the point the disconnection happened, they will fail with a ``ConnectionFailureException``. + It is important to note this does not mean the node did not execute the RPC calls, it only means the completion was not acknowledged. As described above, application code will have to check after the connection is re-established to determine whether these calls were actually executed. + Any observables that were returned before the disconnection will call the ``onError`` handlers. Enabling graceful reconnection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A more graceful form of reconnection is also available, which will block all in-flight calls until the connection is re-established and -will also reconnect the existing ``Observable``\s, so that they keep emitting events to the existing subscribers. +A more graceful form of reconnection is also available. This will: -.. warning:: In this approach, some events might be lost during a re-connection and not sent from the subscribed ``Observable``\s. +- reconnect any existing ``Observable``\s after a reconnection, so that they keep emitting events to the existing subscriptions. +- block any RPC calls that arrive during a reconnection or any RPC calls that were not acknowledged at the point of reconnection and will execute them after the connection is re-established. + +More specifically, the behaviour in the second case is a bit more subtle: + +- Any RPC calls that do not have any side-effects (e.g. ``nodeInfo``) will be retried automatically across reconnections. + This will work transparently for application code that will not be able to determine whether there was a reconnection. + These RPC calls will remain blocked during a reconnection and will return successfully after the connection has been re-established. +- Any RPC calls that do have side-effects, such as the ones invoking flows (e.g. ``startFlow``), will not be retried and they will fail with ``CouldNotStartFlowException``. + This is done in order to avoid duplicate invocations of a flow, thus providing at-most-once guarantees. Application code is responsible for determining whether the flow needs to be retried and retrying it, if needed. + +.. warning:: In this approach, some events might be lost during a reconnection and not sent from the subscribed ``Observable``\s. You can enable this graceful form of reconnection by using the ``gracefulReconnect`` parameter in the following way: @@ -386,15 +382,42 @@ You can enable this graceful form of reconnection by using the ``gracefulReconne val cordaClient = CordaRPCClient(nodeRpcAddress) val cordaRpcOps = cordaClient.start(rpcUserName, rpcUserPassword, gracefulReconnect = true).proxy -Logical retries for flow invocation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Retrying flow invocations +~~~~~~~~~~~~~~~~~~~~~~~~~ -As described above, if you want to retry a flow that failed during a disconnection, you will first need to verify it has not been previously executed. -The only way currently to confirm this is by performing a business-level query. +As implied above, when graceful reconnection is enabled, flow invocations will not be retried across reconnections to avoid duplicate invocations. +This retrying can be done from the application code after checking whether the flow was triggered previously by inspecting whether its side-effects have taken place. +A simplified, sample skeleton of such code could look like the following code: -.. note:: Future releases of Corda are expected to contain new APIs for coping with reconnection in a more resilient way providing stricter - safety guarantees. +.. container:: codeset + .. sourcecode:: kotlin + + fun runFlowWithRetries(client: CordaRPCOps) { + try { + client.startFlowDynamic(...) + } catch (exception: CouldNotStartFlowException) { + if (!wasFlowTriggered()) { + runFlowWithRetries(client) + } + } + } + + .. sourcecode:: java + + void runFlowWithRetries(CordaRPCOps client) { + try { + client.startFlowDynamic(...); + } catch (CouldNotStartFlowException exception) { + if (!wasFlowTriggered()) { + runFlowWithRetries(client); + } + } + } + +The logic of the ``wasFlowTriggered()`` function is naturally dependent on the flow logic, so it can differ per use-case. + +.. warning:: This approach provides at-least-once guarantees. It cannot provide exactly-once guarantees, because of race conditions between the moment the check is performed and the moment the side-effects of the flow become visible. Wire security -------------