mirror of
https://github.com/corda/corda.git
synced 2025-02-18 16:40:55 +00:00
First section of Corda technical whitepaper
This commit is contained in:
parent
0d3e35c5f5
commit
180c9acaff
@ -31,6 +31,7 @@
|
|||||||
howpublished = "{\url{http://ec.europa.eu/finance/financial-markets/settlement/index_en.htm}}",
|
howpublished = "{\url{http://ec.europa.eu/finance/financial-markets/settlement/index_en.htm}}",
|
||||||
year = 1998
|
year = 1998
|
||||||
}
|
}
|
||||||
|
|
||||||
@misc{Bitcoin,
|
@misc{Bitcoin,
|
||||||
title = "\emph{{Bitcoin: A Peer-to-Peer Electronic Cash System}}",
|
title = "\emph{{Bitcoin: A Peer-to-Peer Electronic Cash System}}",
|
||||||
author = "{{Nakamoto}}",
|
author = "{{Nakamoto}}",
|
||||||
@ -65,3 +66,74 @@
|
|||||||
howpublished = "{\url{http://arxiv.org/abs/1608.00771}}",
|
howpublished = "{\url{http://arxiv.org/abs/1608.00771}}",
|
||||||
year = 2016
|
year = 2016
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@misc{CordaIntro,
|
||||||
|
title = "\emph{{Corda: An introduction}}",
|
||||||
|
author = "{{Brown, Carlyle, Grigg, Hearn}}",
|
||||||
|
howpublished = "{\url{http://r3cev.com/s/corda-introductory-whitepaper-final.pdf}}",
|
||||||
|
year = 2016
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{PaymentChannels,
|
||||||
|
title = "Bitcoin micropayment channels",
|
||||||
|
author = "{{Mike Hearn}}",
|
||||||
|
howpublished = "{\url{https://bitcoinj.github.io/working-with-micropayments}}",
|
||||||
|
year = 2014
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{BIP70,
|
||||||
|
title = "Bitcoin payment protocol",
|
||||||
|
author = "{{Mike Hearn, Gavin Andresen}}",
|
||||||
|
howpublished = "{\url{https://github.com/bitcoin/bips/blob/master/bip-0070.mediawiki}}",
|
||||||
|
year = 2013
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{HBBFT,
|
||||||
|
author = {Andrew Miller and Yu Xia and Kyle Croman and Elaine Shi and Dawn Song},
|
||||||
|
title = "{{The Honey Badger of BFT Protocols}}",
|
||||||
|
howpublished = {Cryptology ePrint Archive, Report 2016/199},
|
||||||
|
year = 2016,
|
||||||
|
note = {\url{http://eprint.iacr.org/2016/199}},
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{ILPCC,
|
||||||
|
author = {Stefan Thomas},
|
||||||
|
title = "Crypto-Conditions",
|
||||||
|
howpublished = {\url{https://interledger.org/five-bells-condition/spec.html}},
|
||||||
|
year = 2016
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{AMQP,
|
||||||
|
added-at = {2013-01-14T13:56:18.000+0100},
|
||||||
|
author = {OASIS},
|
||||||
|
biburl = {http://www.bibsonomy.org/bibtex/2b0b0d48cc2254e62b9b15110aa84ea95/flrnb},
|
||||||
|
editor = {Godfrey, Robert and Ingham, David and Schloming, Rafael},
|
||||||
|
interhash = {abed552c6901a1a994e1e99f55e6a0f5},
|
||||||
|
intrahash = {b0b0d48cc2254e62b9b15110aa84ea95},
|
||||||
|
keywords = {amqp},
|
||||||
|
timestamp = {2013-01-14T13:56:18.000+0100},
|
||||||
|
title = {Advanced Message Queuing Protocol (AMQP) Version 1.0},
|
||||||
|
url = {http://docs.oasis-open.org/amqp/core/v1.0/amqp-core-complete-v1.0.pdf},
|
||||||
|
year = 2012
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{BigTable,
|
||||||
|
author = {Chang, Fay and Dean, Jeffrey and Ghemawat, Sanjay and Hsieh, Wilson C. and Wallach, Deborah A. and Burrows, Mike and Chandra, Tushar and Fikes, Andrew and Gruber, Robert E.},
|
||||||
|
title = {Bigtable: A Distributed Storage System for Structured Data},
|
||||||
|
journal = {ACM Trans. Comput. Syst.},
|
||||||
|
issue_date = {June 2008},
|
||||||
|
volume = {26},
|
||||||
|
number = {2},
|
||||||
|
month = jun,
|
||||||
|
year = {2008},
|
||||||
|
issn = {0734-2071},
|
||||||
|
pages = {4:1--4:26},
|
||||||
|
articleno = {4},
|
||||||
|
numpages = {26},
|
||||||
|
url = {http://doi.acm.org/10.1145/1365815.1365816},
|
||||||
|
doi = {10.1145/1365815.1365816},
|
||||||
|
acmid = {1365816},
|
||||||
|
publisher = {ACM},
|
||||||
|
address = {New York, NY, USA},
|
||||||
|
keywords = {Large-Scale Distributed Storage},
|
||||||
|
}
|
363
docs/source/whitepaper/corda-technical-whitepaper.tex
Normal file
363
docs/source/whitepaper/corda-technical-whitepaper.tex
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
\documentclass{article}
|
||||||
|
\author{Mike Hearn}
|
||||||
|
\date{December, 2016}
|
||||||
|
\title{Corda: A distributed ledger}
|
||||||
|
%%\setlength{\parskip}{\baselineskip}
|
||||||
|
\usepackage{amsfonts}
|
||||||
|
\usepackage{listings}
|
||||||
|
\usepackage{color}
|
||||||
|
\usepackage{epigraph}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\graphicspath{ {images/} }
|
||||||
|
\usepackage[export]{adjustbox}
|
||||||
|
\usepackage{float}
|
||||||
|
\usepackage{hyperref}
|
||||||
|
\usepackage[super,comma,sort&compress]{natbib}
|
||||||
|
\usepackage[nottoc]{tocbibind}
|
||||||
|
\usepackage[parfill]{parskip}
|
||||||
|
\usepackage{textcomp}
|
||||||
|
%\usepackage[natbibapa]{apacite}
|
||||||
|
\renewcommand{\thefootnote}{\alph{footnote}}
|
||||||
|
|
||||||
|
%\epigraphfontsize{\small\itshape}
|
||||||
|
\setlength\epigraphwidth{4.5cm}
|
||||||
|
\setlength\epigraphrule{0pt}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\maketitle
|
||||||
|
%\epigraphfontsize{\small\itshape}
|
||||||
|
|
||||||
|
%\renewcommand{\abstractname}{An introduction}
|
||||||
|
\begin{center}
|
||||||
|
Version 1.0
|
||||||
|
|
||||||
|
\emph{Confidential: Pre-Publication Draft For R3 DLG}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
\vspace{10mm}
|
||||||
|
|
||||||
|
\begin{abstract}
|
||||||
|
|
||||||
|
A decentralised database with minimal trust between nodes would allow for the creation of a global ledger. Such a ledger
|
||||||
|
would not only be capable of implementing cryptocurrencies but also have many useful applications in finance, trade,
|
||||||
|
supply chain tracking and more. We present Corda, a decentralised global database, and describe in detail how it
|
||||||
|
achieves the goal of providing a robust and easy to use platform for decentralised app development. We elaborate on the
|
||||||
|
high level description provided in the paper \emph{Corda: An introduction}\cite{CordaIntro} and provide a detailed
|
||||||
|
technical overview, but assume no prior knowledge of the platform.
|
||||||
|
|
||||||
|
\end{abstract}
|
||||||
|
\newpage
|
||||||
|
\tableofcontents
|
||||||
|
\newpage
|
||||||
|
\section{Introduction}
|
||||||
|
|
||||||
|
In many industries significant effort is needed to keep organisation-specific databases in sync with each
|
||||||
|
other. In the financial sector the effort of keeping different databases synchronised, reconciling them to ensure
|
||||||
|
they actually are synchronised and resolving the `breaks' that occur when they are not represents a significant
|
||||||
|
fraction of the total work a bank actually does!
|
||||||
|
|
||||||
|
Why not just use a shared relational database? This would certainly solve a lot of problems with only existing technology,
|
||||||
|
but it would also raise more questions than answers:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item Who would run this database? Where would we find a sufficient supply of angels to own it?
|
||||||
|
\item In which countries would it be hosted? What would stop that country abusing the mountain of sensitive information it would have?
|
||||||
|
\item What if it got hacked?
|
||||||
|
\item Can you actually scale a relational database to fit the entire financial system within it?
|
||||||
|
\item What happens if The Financial System\texttrademark~needs to go down for maintenance?
|
||||||
|
\item What kind of nightmarish IT bureaucracy would guard changes to the database schemas?
|
||||||
|
\item How would you manage access control?
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
We can imagine many other questions. A decentralised database attempts to answer them.
|
||||||
|
|
||||||
|
In this paper we differentiate between a \emph{decentralised} database and a \emph{distributed} database. A distributed
|
||||||
|
database like BigTable\cite{BigTable} scales to large datasets and transaction volumes by spreading the data over many
|
||||||
|
computers. However it is assumed that the computers in question are all run by a single homogenous organisation and that
|
||||||
|
the nodes comprising the database all trust each other not to misbehave or leak data. In a decentralised database, such
|
||||||
|
as the one underpinning Bitcoin\cite{Bitcoin}, the nodes make much weaker trust assumptions and actively cross-check
|
||||||
|
each others work. Such databases trade off performance and usability in order to gain security and global acceptance.
|
||||||
|
|
||||||
|
\emph{Corda} is a decentralised database platform with the following novel features:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item New transaction types can be defined using JVM\cite{JVM} bytecode.
|
||||||
|
\item Transactions may execute in parallel, on different nodes, without either node being aware of the other's transactions.
|
||||||
|
\item Nodes are arranged in an authenticated peer to peer network. All communication is direct.
|
||||||
|
\item There is no block chain\cite{Bitcoin}. Transaction races are deconflicted using pluggable \emph{notaries}. A single
|
||||||
|
Corda network may contain multiple notaries that provide their guarantees using a variety of different algorithms. Thus
|
||||||
|
Corda is not tied to any particular consensus algorithm.
|
||||||
|
\item Data is shared on a need-to-know basis. Nodes provide the dependency graph of a transaction they are sending to
|
||||||
|
another node on demand, but there is no global broadcast of \emph{all} transactions.
|
||||||
|
\item Bytecode-to-bytecode transpilation is used to allow complex, multi-step transaction building protocols called
|
||||||
|
\emph{flows} to be modelled as blocking code. The code is transformed into an asynchronous state machine, with
|
||||||
|
checkpoints written to the node's backing database when messages are sent and received. A node may potentially have
|
||||||
|
millions of flows active at once and they may last days, across node restarts and even upgrades. Flows expose progress
|
||||||
|
information to node administrators and users and may interact with people as well as other nodes.
|
||||||
|
\item The data model allows for arbitrary object graphs to be stored in the ledger. These graphs are called \emph{states} and are the atomic unit of data.
|
||||||
|
\item The platform provides a rich type system for the representation of things like dates, currencies, legal entities and so on.
|
||||||
|
\item States can declare a relational mapping and can be queried using SQL.
|
||||||
|
\item Integration with existing systems is considered from the start. The network can support rapid bulk data imports
|
||||||
|
from other database systems without placing load on the network. Global ledger data can be joined with existing,
|
||||||
|
internal RDBMS tables thanks to slots in the state definitions that are reserved for join keys. Events on the ledger
|
||||||
|
are exposed via an embedded JMS compatible message broker.
|
||||||
|
\item States can declare scheduled events. For example an interest rate swap state may declare fixing events.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Comparisons with Bitcoin and Ethereum will be provided throughout.
|
||||||
|
|
||||||
|
\newpage
|
||||||
|
|
||||||
|
\section{Overview}
|
||||||
|
|
||||||
|
Corda is a platform for the writing of ``CorDapps'': applications that extend the global database with new capabilities.
|
||||||
|
Such apps define new data types, new inter-node protocols and the ``smart contracts'' that determine allowed changes.
|
||||||
|
|
||||||
|
What is a smart contract? That depends on the model of computation we are talking about. There are two competing
|
||||||
|
computational models used in decentralised databases: the virtual computer model and the UTXO model.
|
||||||
|
|
||||||
|
\paragraph{Virtual computers.}In Ethereum the database is modelled as the memory of a shared global computer. This
|
||||||
|
computer's state is replicated across thousands of individual physical computers and has a single thread of execution,
|
||||||
|
defined by a Nakamoto block chain\cite{Bitcoin}. Inside this computer live a set of objects in the classical OOP sense:
|
||||||
|
code and data tightly bound together. Each object is identified by an address and these objects, called `contracts',
|
||||||
|
expose methods. These methods may be invoked either by other methods, or by initiating calls submitted by users of the
|
||||||
|
network via a transaction. In this model the term \emph{smart contract} can refer both to the code and sometimes a
|
||||||
|
particular instantiation of the code and data (an object). Access control is implemented by exposing the identity of the
|
||||||
|
caller to a method's implementation: this identity is derived from the signatures on a transaction. A transaction is
|
||||||
|
considered valid if the execution it triggers does not throw any exceptions.
|
||||||
|
|
||||||
|
\paragraph{UTXO model.}In Bitcoin the database can be viewed as a set of rows. Each row is identified by a
|
||||||
|
\texttt{(hash:number)} pair and contains two columns: \texttt{value}, specifying a quantity of bitcoin, and
|
||||||
|
\texttt{scriptPubKey} which contains a small bytecode program that validates any transaction that wishes to consume that
|
||||||
|
row. Rows cannot be changed: transactions can only delete rows and add new rows. Deleted rows are called transaction
|
||||||
|
`inputs' and added rows are called transaction `outputs'. The identity of a row is therefore derived from the hash of
|
||||||
|
the transaction that created it and the index of the output inside that transaction. In practice of course there is no
|
||||||
|
requirement to actually delete a row when a transaction consumes it, and the first versions of the Bitcoin software
|
||||||
|
simply marked them as spent. It was only later that the database was changed to physically delete used rows, in order to
|
||||||
|
improve performance. The bytecode programs typically check for the presence of a signature over the transaction from a
|
||||||
|
particular public key, hence the name \texttt{scriptPubKey}. However this is not required, and it's possible to craft
|
||||||
|
more complex conditions such as requiring a subset of a group of keys, or the presence of a password (note that a
|
||||||
|
password by itself would not be sufficient to secure access to the row). A transaction is considered valid if there is
|
||||||
|
at least one input, the inputs point to rows that are not yet spent and the data in the inputs satisfies the bytecode
|
||||||
|
programs for each row. A special kind of transaction, called a \emph{coinbase transaction}, is allowed to violate these
|
||||||
|
rules and may have inputs that don't refer to any existing row: such a transaction is allowed exactly once per block in
|
||||||
|
the block chain and as a result is only ever created by the miners who extend that chain.
|
||||||
|
|
||||||
|
It is vital to observe that the two data models use the term \emph{smart contract} to mean completely different
|
||||||
|
things. In the virtual computer model a smart contract is essentially like an object or class in object-oriented
|
||||||
|
programming: a group of mutable variables accessed via callable methods which can change those variables in arbitrary ways.
|
||||||
|
In the UTXO model a smart contract is a predicate: a function that yields either \texttt{true} or \texttt{false} and
|
||||||
|
which cannot change anything in the database by itself.
|
||||||
|
|
||||||
|
A deeper discussion of the tradeoffs between the different approaches can be found in a later section.
|
||||||
|
|
||||||
|
Corda uses the UTXO model and as a result its transactions are structurally similar to Bitcoin transactions: they have
|
||||||
|
inputs, outputs and signatures. Unlike Bitcoin, Corda database rows can contain arbitrary data, not just a value field.
|
||||||
|
Because the data consumed and added by transactions is not necessarily a set of key/value pairs, we don't talk about rows
|
||||||
|
but rather \emph{states}. Like Bitcoin, Corda states are associated with bytecode programs that must accept a transaction
|
||||||
|
for it to be valid, but unlike Bitcoin, a transaction must satisfy the programs for both the input and output states
|
||||||
|
at once. \emph{Issuance transactions} may append new states to the database without consuming any existing states but
|
||||||
|
unlike in Bitcoin these transactions are not special and may be created at any time, by anyone.
|
||||||
|
|
||||||
|
In contrast to both Bitcoin and Ethereum, Corda does not order transactions using a block chain and by implication does
|
||||||
|
not use miners or proof-of-work. Instead each state points to a \emph{notary}, which is a service that guarantees it
|
||||||
|
will sign a transaction only if all the input states are un-consumed. A transaction is not allowed to consume states
|
||||||
|
controlled by multiple notaries and thus there is never any need for two-phase commit between notaries. If a combination of
|
||||||
|
states would cross notaries then a special transaction type is used to move them onto a single notary first.
|
||||||
|
|
||||||
|
Notaries are expected to be composed of multiple mutually distrusting parties who use a byzantine fault
|
||||||
|
tolerant algorithm like HoneyBadgerBFT\cite{HBBFT} to reach consensus. Notaries are identified by and sign with compound
|
||||||
|
public keys that conceptually follow the Interledger Crypto-Conditions specification\cite{ILPCC}. Note that whilst it
|
||||||
|
would be conventional to use a BFT algorithm for a notary service, there is no requirement to do so and in cases where
|
||||||
|
the legal system is sufficient to ensure protocol compliance a higher performance algorithm like RAFT may be used.
|
||||||
|
Because multiple notaries can co-exist a single network may provide a single global BFT notary for
|
||||||
|
general use and region-specific RAFT notaries for low latency trading within a unified regulatory area, for example
|
||||||
|
London or New York.
|
||||||
|
|
||||||
|
The Corda transaction format has various other features which are described in later sections.
|
||||||
|
|
||||||
|
\section{The peer to peer network}
|
||||||
|
|
||||||
|
\subsection{Network overview}
|
||||||
|
A Corda network consists of the following components:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item Nodes, communicating using AMQP/1.0 over TLS. Nodes use a relational database for data storage.
|
||||||
|
\item A permissioning service that automates the process of provisioning TLS certificates.
|
||||||
|
\item A network map service that publishes information about nodes on the network.
|
||||||
|
\item One or more notary services. A notary may itself be distributed over multiple nodes.
|
||||||
|
\item Zero or more oracle services. An oracle is a well known service that signs transactions if they state a fact
|
||||||
|
and that fact is considered to be true. This is how the ledger can be connected to the real world, despite being
|
||||||
|
fully deterministic.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
A purely in-memory implementation of the messaging subsystem is provided which can inject simulated latency between
|
||||||
|
nodes and visualise communications between them. This can be useful for debugging, testing and educational purposes.
|
||||||
|
|
||||||
|
Oracles and notaries are covered in later sections.
|
||||||
|
|
||||||
|
\subsection{Identity and the permissioning service}
|
||||||
|
|
||||||
|
Unlike Bitcoin and Ethereum, Corda is designed for semi-private networks in which admission requires obtaining an
|
||||||
|
identity signed by a root authority. This assumption is pervasive - the flow API provides messaging in terms of identities,
|
||||||
|
with routing and delivery to underlying nodes being handled automatically. There is no global broadcast at any point.
|
||||||
|
|
||||||
|
This `identity' does not have to be a legal or true identity. In the same way that an email address is a globally
|
||||||
|
unique pseudonym that is ultimately rooted by the top of the DNS hierarchy, so too can a Corda network work with
|
||||||
|
arbitrary self-selected usernames. The permissioning service can implement any policy it likes as long as the
|
||||||
|
identities it signs are globally unique. Thus an entirely anonymous Corda network is possible if a suitable
|
||||||
|
IP obfuscation system like Tor is also used.
|
||||||
|
|
||||||
|
Whilst simple string identities are likely sufficient for some networks, the financial industry typically requires some
|
||||||
|
level of \emph{know your customer} checking, and differentiation between different legal entities that may share
|
||||||
|
the same brand name. Corda reuses the standard PKIX infrastructure for connecting public keys to identities and thus
|
||||||
|
names are actually X.500 names. When a single string is sufficient the \emph{common name} field can be used alone,
|
||||||
|
similar to the web PKI. In more complex deployments the additional structure X.500 provides may be useful to
|
||||||
|
differentiate between entities with the same name. For example there are at least five different companies called
|
||||||
|
\emph{American Savings Bank} and in the past there may have been more than 40 independent banks with that name.
|
||||||
|
|
||||||
|
More complex notions of identity that may attest to many time-varying attributes are not handled at this layer of the
|
||||||
|
system: the base identity is always just an X.500 name. Note that even though messaging is always identified, transactions
|
||||||
|
themselves may still contain anonymous public keys.
|
||||||
|
|
||||||
|
\subsection{The network map}
|
||||||
|
|
||||||
|
Every network require a network map service, which may itself be composed of multiple cooperating nodes. This is
|
||||||
|
similar to Tor's concept of \emph{directory authorities}. The network map publishes the IP addresses through which
|
||||||
|
every node on the network can be reached, along with the identity certificates of those nodes and the services they
|
||||||
|
provide. On receiving a connection nodes check that the connecting node is in the network map.
|
||||||
|
|
||||||
|
The network map abstracts the underlying IP addresses of the nodes from more useful business concepts like identities
|
||||||
|
and services. Each participant on the network, called a \emph{party}, publishes one or more IP addresses in the
|
||||||
|
network map. Equivalent domain names may be helpful for debugging but are not required. User interfaces and APIs
|
||||||
|
always work in terms of identities - there is thus no equivalent to Bitcoin's notion of an address (hashed public key),
|
||||||
|
and user-facing applications rely on auto-completion and search rather than QRcodes to identify a logical recipient.
|
||||||
|
|
||||||
|
It is possible to subscribe to network map changes and registering with the map is the first thing a node does at
|
||||||
|
startup. Nodes may optionally advertise their nearest city for load balancing and network visualisation purposes.
|
||||||
|
|
||||||
|
The map is a document that may be cached and distributed throughout the network. The map is therefore not required
|
||||||
|
to be highly available: if the map service becomes unreachable new nodes may not join the network and existing nodes
|
||||||
|
may not change their advertised service set, but otherwise things continue as normal.
|
||||||
|
|
||||||
|
\subsection{Message delivery}
|
||||||
|
|
||||||
|
The network is structurally similar to the email network. Nodes are expected to be long lived but may depart
|
||||||
|
temporarily due to crashes, connectivity interruptions or maintenance. Messages are written to disk
|
||||||
|
and delivery is retried until the remote node has acknowledged a message, at which point it is expected to have
|
||||||
|
either reliably stored the message or processed it completely. Connections between nodes are built and torn down as
|
||||||
|
needed: there is no assumption of constant connectivity. An ideal network would be entirely flat with high quality
|
||||||
|
connectivity between all nodes, but Corda recognises that this is not always compatible with common network
|
||||||
|
setups and thus the message routing component of a node can be separated from the rest and run outside the firewall.
|
||||||
|
In this way nodes that do not have duplex connectivity can still take part in the network as first class citizens.
|
||||||
|
Additionally a single node may have multiple advertised IP addresses.
|
||||||
|
|
||||||
|
The reference implementation provides this functionality using the Apache Artemis message broker, through which it
|
||||||
|
obtains journalling, load balancing, flow control, high availability clustering, streaming of messages too large to fit
|
||||||
|
in RAM and many other useful features. The network uses the \emph{AMQP/1.0}\cite{AMQP} protocol which is a widely
|
||||||
|
implemented binary messaging standard, combined with TLS to secure messages in transit and authenticate the endpoints.
|
||||||
|
|
||||||
|
\subsection{Serialization, sessioning, deduplication and signing}
|
||||||
|
|
||||||
|
All messages are encoded using a compact binary format. Each message has a UUID set in an AMQP header which is used
|
||||||
|
as a deduplication key, thus accidentally redelivered messages will be ignored.
|
||||||
|
|
||||||
|
% TODO: Describe the serialization format in more detail once finalised.
|
||||||
|
|
||||||
|
Messages may also have an associated organising 64-bit \emph{session ID}. Note that this is distinct from the AMQP
|
||||||
|
notion of a session. Sessions can be long lived and persist across node restarts and network outages. They exist in order
|
||||||
|
to group messages that are part of a \emph{flow}, described in more detail below.
|
||||||
|
|
||||||
|
Messages that are successfully processed by a node generate a signed acknowledgement message called a `receipt'. Note that
|
||||||
|
this is distinct from the unsigned acknowledgements that live at the AMQP level and which simply flag that a message was
|
||||||
|
successfully downloaded over the wire. A receipt may be generated some time after the message is processed in the case
|
||||||
|
where acknowledgements are being batched to amortise signing overhead, and the receipt identifies the message by the hash
|
||||||
|
of its content. The purpose of the receipts is to give a node undeniable evidence that a counterparty received a
|
||||||
|
notification that would stand up later in a dispute mediation process. Corda does not attempt to support deniable
|
||||||
|
messaging.
|
||||||
|
|
||||||
|
\newpage
|
||||||
|
\section{Flow framework}
|
||||||
|
|
||||||
|
It is common in decentralised ledger systems for complex multi-party protocols to be needed. The Bitcoin payment channel
|
||||||
|
protocol\cite{PaymentChannels} involves two parties putting money into a multi-signature pot, then iterating with your
|
||||||
|
counterparty a shared transaction that spends that pot, with extra transactions used for the case where one party or the
|
||||||
|
other fails to terminate properly. Such protocols typically involve reliable private message passing, checkpointing to
|
||||||
|
disk, signing of transactions, interaction with the p2p network, reporting progress to the user, maintaining a complex
|
||||||
|
state machine with timeouts and error cases, and possibly interaction with internal systems on either side. All
|
||||||
|
this can become quite involved. The implementation of Bitcoin payment channels in the bitcoinj library is approximately
|
||||||
|
9000 lines of Java, very little of which involves cryptography.
|
||||||
|
|
||||||
|
As another example, the core Bitcoin protocol only
|
||||||
|
allows you to append transactions to the ledger. Transmitting other information that might be useful such as a text message,
|
||||||
|
refund address, identity information and so on is not supported and must be handled in some other way - typically by
|
||||||
|
wrapping the raw ledger transaction bytes in a larger message that adds the desired metadata and giving responsibility
|
||||||
|
for broadcasting the embedded transaction to the recipient, as in Bitcoin's BIP 70\cite{BIP70}.
|
||||||
|
|
||||||
|
In Corda transaction data is not globally broadcast. Instead it is transmitted to the relevant parties only when they
|
||||||
|
need to see it. Moreover even quite simple use cases - like sending cash - may involve a multi-step negotiation between
|
||||||
|
counterparties and the involvement of a third party such as a notary. Additional information that isn't put into the
|
||||||
|
ledger is considered essential, as opposed to nice-to-have. Thus unlike traditional blockchain systems in which the primary
|
||||||
|
form of communication is global broadcast, in Corda \emph{all} communication takes the form of small multi-party sub-protocols
|
||||||
|
called flows.
|
||||||
|
|
||||||
|
The flow framework presents a programming model that looks to the developer as if they have the ability to run millions
|
||||||
|
of long lived threads which can survive node restarts, and even node upgrades. APIs are provided to send and receive
|
||||||
|
object graphs to and from other identities on the network, embed sub-flows, and report progress to observers. In this
|
||||||
|
way business logic can be expressed at a very high level, with the details of making it reliable and efficient
|
||||||
|
abstracted away. This is achieved with the following components.
|
||||||
|
|
||||||
|
\paragraph{Just-in-time state machine compiler.}Code that is written in a blocking manner typically cannot be stopped
|
||||||
|
and transparently restarted later. The first time a flow's \texttt{call} method is invoked a bytecode-to-bytecode
|
||||||
|
transformation occurs that rewrites the classes into a form that implements a resumable state machine. These state
|
||||||
|
machines are sometimes called fibers or coroutines, and the transformation engine Corda uses is capable of rewriting
|
||||||
|
code arbitrarily deep in the stack on the fly. The developer may thus break his or her logic into multiple methods and
|
||||||
|
classes, use loops, and generally structure their program as if it were executing in a single blocking thread. There's only a
|
||||||
|
small list of things they should not do: sleeping, directly accessing the network APIs, or doing other tasks that might
|
||||||
|
block outside of the framework.
|
||||||
|
|
||||||
|
\paragraph{Transparent checkpointing.}When a flow wishes to wait for a message from another party (or input from a
|
||||||
|
human being) the underlying stack frames are suspended onto the heap, then crawled and serialized into the node's
|
||||||
|
underlying relational database using an object serialization framework. The written objects are prefixed with small
|
||||||
|
schema definitions that allow some measure of portability across changes to the layout of objects, although
|
||||||
|
portability across changes to the stack layout is left for future work. Flows are resumed and suspended on demand, meaning
|
||||||
|
it is feasible to have far more flows active at once than would fit in memory. The checkpointing process is atomic with
|
||||||
|
changes to local storage and acknowledgement of network messages.
|
||||||
|
|
||||||
|
\paragraph{Identity to IP address mapping.}Flows are written in terms of identities. The framework takes care of routing
|
||||||
|
messages to the right IP address for a given identity, following movements that may take place whilst the flow is active
|
||||||
|
and handling load balancing for multi-homed parties as appropriate.
|
||||||
|
|
||||||
|
\paragraph{A library of subflows.}Flows can invoke sub-flows, and a library of flows is provided to automate common tasks
|
||||||
|
like notarising a transaction or atomically swapping ownership of two assets.
|
||||||
|
|
||||||
|
\paragraph{Progress reporting.}Flows can provide a progress tracker that indicates which step they are up to. Steps can
|
||||||
|
have human-meaningful labels, along with other tagged data like a progress bar. Progress trackers are hierarchical and
|
||||||
|
steps can have sub-trackers for invoked sub-flows.
|
||||||
|
|
||||||
|
\paragraph{Flow hospital.}Flows can pause if they throw exceptions or explicitly request human assistance. A flow that
|
||||||
|
has stopped appears in the \emph{flow hospital} where the node's administrator may decide to kill the flow or provide it
|
||||||
|
with a solution. The ability to request manual solutions is useful for cases where the other side isn't sure why you
|
||||||
|
are contacting them, for example, the specified reason for sending a payment is not recognised, or when the asset used for
|
||||||
|
a payment is not considered acceptable.
|
||||||
|
|
||||||
|
% TODO: Event scheduling
|
||||||
|
% TODO: Data model: commands, attachments
|
||||||
|
% TODO: Cash and IOU modelling
|
||||||
|
% TODO: Notaries
|
||||||
|
% TODO: App platform and JVM sandboxing
|
||||||
|
% TODO: Client side signing devices
|
||||||
|
% TODO: Client RPC and reactive collections
|
||||||
|
% TODO: Integration with existing database systems
|
||||||
|
% TODO: Privacy techniques
|
||||||
|
% TODO: Experimental contract DSL?
|
||||||
|
|
||||||
|
\section{Conclusion}
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
\bibliographystyle{unsrt}
|
||||||
|
\bibliography{Ref}
|
||||||
|
|
||||||
|
\end{document}
|
Loading…
x
Reference in New Issue
Block a user