corda/docs/build/html/data-model.html



<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Data model &mdash; R3 Corda latest documentation</title>
  

    <link rel="stylesheet" href="_static/css/custom.css" type="text/css" />
  

    <link rel="top" title="R3 Corda latest documentation" href="index.html"/>
        <link rel="next" title="Transaction Data Types" href="transaction-data-types.html"/>
        <link rel="prev" title="Getting set up" href="getting-set-up.html"/> 

  
  <script src="_static/js/modernizr.min.js"></script>

</head>

<body class="wy-body-for-nav" role="document">

  <div class="wy-grid-for-nav">

    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search">
          

            <a href="index.html" class="icon icon-home"> R3 Corda
          

          </a>

          
              <div class="version">
                latest
              </div>
            
          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
<br>
<a href="api/index.html">API reference</a>

        </div>

        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
                <p class="caption"><span class="caption-text">Overview</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="inthebox.html">What&#8217;s included?</a></li>
<li class="toctree-l1"><a class="reference internal" href="getting-set-up.html">Getting set up</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Data model</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#overview">Overview</a></li>
<li class="toctree-l2"><a class="reference internal" href="#comparison-with-bitcoin">Comparison with Bitcoin</a></li>
<li class="toctree-l2"><a class="reference internal" href="#comparison-with-ethereum">Comparison with Ethereum</a></li>
<li class="toctree-l2"><a class="reference internal" href="#rationale-for-and-tradeoffs-in-adopting-a-utxo-style-model">Rationale for and tradeoffs in adopting a UTXO-style model</a></li>
<li class="toctree-l2"><a class="reference internal" href="#rationale">Rationale</a></li>
<li class="toctree-l2"><a class="reference internal" href="#pros">Pros</a></li>
<li class="toctree-l2"><a class="reference internal" href="#cons">Cons</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="transaction-data-types.html">Transaction Data Types</a></li>
<li class="toctree-l1"><a class="reference internal" href="consensus.html">Consensus Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="messaging.html">Networking and messaging</a></li>
<li class="toctree-l1"><a class="reference internal" href="running-the-demos.html">Running the demos</a></li>
<li class="toctree-l1"><a class="reference internal" href="node-administration.html">Node administration</a></li>
<li class="toctree-l1"><a class="reference internal" href="irs.html">The Interest Rate Swap Contract</a></li>
</ul>
<p class="caption"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="where-to-start.html">Where to start</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorial-contract.html">Writing a contract</a></li>
<li class="toctree-l1"><a class="reference internal" href="protocol-state-machines.html">Protocol state machines</a></li>
<li class="toctree-l1"><a class="reference internal" href="oracles.html">Writing oracle services</a></li>
</ul>
<p class="caption"><span class="caption-text">Appendix</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="release-process.html">Release process</a></li>
<li class="toctree-l1"><a class="reference internal" href="release-process.html#steps-to-cut-a-release">Steps to cut a release</a></li>
<li class="toctree-l1"><a class="reference internal" href="release-notes.html">Release notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="visualiser.html">Using the visualiser</a></li>
<li class="toctree-l1"><a class="reference internal" href="codestyle.html">Code style guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="building-the-docs.html">Building the documentation</a></li>
</ul>

            
        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
        <a href="index.html">R3 Corda</a>
      </nav>


      <div class="wy-nav-content">
        <div class="rst-content">
          

<div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
    <li><a href="index.html">Docs</a> &raquo;</li>
      
    <li>Data model</li>
      <li class="wy-breadcrumbs-aside">
        
          
            <a href="_sources/data-model.txt" rel="nofollow"> View page source</a>
          
        
      </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="data-model">
<h1>Data model<a class="headerlink" href="#data-model" title="Permalink to this headline">¶</a></h1>
<p>This article covers the data model: how <em>states</em>, <em>transactions</em> and <em>code contracts</em> interact with each other and
how they are represented in the code. It doesn&#8217;t attempt to give detailed design rationales or information on future
design elements: please refer to the R3 wiki for background information.</p>
<div class="section" id="overview">
<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
<p>We begin with the idea of a global ledger. In our model, although the ledger is shared, it is not always the case that
transactions and ledger entries are globally visible. In cases where a set of transactions stays within a small subgroup of
users it should be possible to keep the relevant data purely within that group.</p>
<p>To ensure consistency in a global, shared system where not all data may be visible to all participants, we rely
heavily on secure hashes like SHA-256 to identify things. The ledger is defined as a set of immutable <strong>states</strong>, which
are created and destroyed by digitally signed <strong>transactions</strong>. Each transaction points to a set of states that it will
consume/destroy, these are called <strong>inputs</strong>, and contains a set of new states that it will create, these are called
<strong>outputs</strong>.</p>
<p>States contain arbitrary data, but they always contain at minimum a hash of the bytecode of a
<strong>contract code</strong> file, which is a program expressed in JVM byte code that runs sandboxed inside a Java virtual machine.
Contract code (or just &#8220;contracts&#8221; in the rest of this document) are globally shared pieces of business logic.</p>
<p>Contracts define a <strong>verify function</strong>, which is a pure function given the entire transaction as input. To be considered
valid, the transaction must be <strong>accepted</strong> by the verify function of every contract pointed to by the
input and output states.</p>
<p>Beyond inputs and outputs, transactions may also contain <strong>commands</strong>, small data packets that
the platform does not interpret itself, but which can parameterise execution of the contracts. They can be thought of as
arguments to the verify function. Each command has a list of <strong>public keys</strong> associated with it. The platform ensures
that the transaction is signed by every key listed in the commands before the contracts start to execute. Thus, a verify
function can trust that all listed keys have signed the transaction but is responsible for verifying that any keys required
for the transaction to be valid from the verify function&#8217;s perspective are included in the list. Public keys
may be random/identityless for privacy, or linked to a well known legal identity, for example via a
<em>public key infrastructure</em> (PKI).</p>
<p>Commands are always embedded inside a transaction. Sometimes, there&#8217;s a larger piece of data that can be reused across
many different transactions. For this use case, we have <strong>attachments</strong>. Every transaction can refer to zero or more
attachments by hash. Attachments are always ZIP/JAR files, which may contain arbitrary content. Contract code can then
access the attachments by opening them as a JarInputStream (this is temporary and will change later).</p>
<p>Note that there is nothing that explicitly binds together specific inputs, outputs, commands or attachments. Instead
it&#8217;s up to the contract code to interpret the pieces inside the transaction and ensure they fit together correctly. This
is done to maximise flexibility for the contract developer.</p>
<p>Transactions may sometimes need to provide a contract with data from the outside world. Examples may include stock
prices, facts about events or the statuses of legal entities (e.g. bankruptcy), and so on. The providers of such
facts are called <strong>oracles</strong> and they provide facts to the ledger by signing transactions that contain commands they
recognise, or by creating signed attachments. The commands contain the fact and the signature shows agreement to that fact.
Time is also modelled as a fact, with the signature of a special kind of oracle called a <strong>timestamping authority</strong> (TSA).
A TSA signs a transaction if a pre-defined timestamping command in it defines a after/before time window that includes
&#8220;true time&#8221; (i.e. GPS time as calibrated to the US Naval Observatory). An oracle may prefer to generate a signed
attachment if the fact it&#8217;s creating is relatively static and may be referred to over and over again.</p>
<p>As the same terminology often crops up in different distributed ledger designs, let&#8217;s compare this to other
distributed ledger systems you may be familiar with. You can find more detailed design rationales for why the platform
differs from existing systems in <a class="reference external" href="https://r3-cev.atlassian.net/wiki/display/AWG/Platform+Stream%3A+Corda">the R3 wiki</a>,
but to summarise, the driving factors are:</p>
<ul class="simple">
<li>Improved contract flexibility vs Bitcoin</li>
<li>Improved scalability vs Ethereum, as well as ability to keep parts of the transaction graph private (yet still uniquely addressable)</li>
<li>No reliance on proof of work</li>
<li>Re-use of existing sandboxing virtual machines</li>
<li>Use of type safe GCd implementation languages.</li>
<li>Simplified auditing</li>
</ul>
</div>
<div class="section" id="comparison-with-bitcoin">
<h2>Comparison with Bitcoin<a class="headerlink" href="#comparison-with-bitcoin" title="Permalink to this headline">¶</a></h2>
<p>Similarities:</p>
<ul class="simple">
<li>The basic notion of immutable states that are consumed and created by transactions is the same.</li>
<li>The notion of transactions having multiple inputs and outputs is the same. Bitcoin sometimes refers to the ledger
as the unspent transaction output set (UTXO set) as a result.</li>
<li>Like in Bitcoin, a contract is pure function. Contracts do not have storage or the ability to interact with anything.
Given the same transaction, a contract&#8217;s accept function always yields exactly the same result.</li>
<li>Bitcoin output scripts are parameterised by the input scripts in the spending transaction. This is somewhat similar
to our notion of a <em>command</em>.</li>
<li>Bitcoin transactions, like ours, refer to the states they consume by using a (txhash, index) pair. The Bitcoin
protocol calls these &#8220;outpoints&#8221;. In our prototype code they are known as <code class="docutils literal"><span class="pre">StateRefs</span></code> but the concept is identical.</li>
<li>Bitcoin transactions have an associated timestamp (the time at which they are mined).</li>
</ul>
<p>Differences:</p>
<ul class="simple">
<li>A Bitcoin transaction has a single, rigid data format. A &#8220;state&#8221; in Bitcoin is always a (quantity of bitcoin, script)
pair and cannot hold any other data. Some people have been known to try and hack around this limitation by embedding
data in semi-standardised places in the contract code so the data can be extracted through pattern matching, but this
is a poor approach. Our states can include arbitrary typed data.</li>
<li>A Bitcoin transaction&#8217;s acceptance is controlled only by the contract code in the consumed input states. In practice
this has proved limiting. Our transactions invoke not only input contracts but also the contracts of the outputs.</li>
<li>A Bitcoin script can only be given a fixed set of byte arrays as the input. This means there&#8217;s no way for a contract
to examine the structure of the entire transaction, which severely limits what contracts can do.</li>
<li>Our contracts are Turing-complete and can be written in any ordinary programming language that targets the JVM.</li>
<li>Our transactions and contracts have to get their time from an attached timestamp rather than a block chain. This is
important given that we are currently considering block-free conflict resolution algorithms.</li>
<li>We use the term &#8220;contract&#8221; to refer to a bundle of business logic that may handle various different tasks, beyond
transaction verification. For instance, currently our contracts also include code for creating valid transactions
(this is often called &#8220;wallet code&#8221; in Bitcoin).</li>
</ul>
</div>
<div class="section" id="comparison-with-ethereum">
<h2>Comparison with Ethereum<a class="headerlink" href="#comparison-with-ethereum" title="Permalink to this headline">¶</a></h2>
<p>Similarities:</p>
<ul class="simple">
<li>Like Ethereum, code runs inside a relatively powerful virtual machine and can contain complex logic. Non-assembly
based programming languages can be used for contract programming.</li>
<li>They are both intended for the modelling of many different kinds of financial contract.</li>
</ul>
<p>Differences:</p>
<ul class="simple">
<li>The term &#8220;contract&#8221; in Ethereum refers to an <em>instantiation</em> of a program that is replicated and maintained by
every participating node. This instantiation is very much like an object in an OO program: it can receive and send
messages, update local storage and so on. In contrast, we use the term &#8220;contract&#8221; to refer to a set of functions, only
one of which is a part of keeping the system synchronised (the verify function). That function is pure and
stateless i.e. it may not interact with any other part of the system whilst executing.</li>
<li>There is no notion of an &#8220;account&#8221;, as there is in Ethereum.</li>
<li>As contracts don&#8217;t have any kind of mutable storage, there is no notion of a &#8220;message&#8221; as in Ethereum.</li>
<li>Ethereum claims to be a platform not only for financial logic, but literally any kind of application at all. Our
platform considers non-financial applications to be out of scope.</li>
</ul>
</div>
<div class="section" id="rationale-for-and-tradeoffs-in-adopting-a-utxo-style-model">
<h2>Rationale for and tradeoffs in adopting a UTXO-style model<a class="headerlink" href="#rationale-for-and-tradeoffs-in-adopting-a-utxo-style-model" title="Permalink to this headline">¶</a></h2>
<p>As discussed above, Corda uses the so-called &#8220;UTXO set&#8221; model (unspent transaction output). In this model, the database
does not track accounts or balances. Instead all database entries are immutable. An entry is either spent or not spent
but it cannot be changed. In Bitcoin, spentness is implemented simply as deletion – the inputs of an accepted transaction
are deleted and the outputs created.</p>
<p>This approach has some advantages and some disadvantages, which is why some platforms like Ethereum have tried
(or are trying) to abstract this choice away and support a more traditional account-like model.  We have explicitly
chosen <em>not</em> to do this and our decision to adopt a UTXO-style model is a deliberate one.  In the section below,
the rationale for this decision and its pros and cons of this choice are outlined.</p>
</div>
<div class="section" id="rationale">
<h2>Rationale<a class="headerlink" href="#rationale" title="Permalink to this headline">¶</a></h2>
<p>Corda, in common with other blockchain-like platforms, is designed to bring parties to shared sets of data into
consensus as to the existence, content and allowable evolutions of those data sets. However, Corda is designed with the
explicit aim of avoiding, to the extent possible, the scalability and privacy implications that arise from those platforms&#8217;
decisions to adopt a global broadcast model.</p>
<p>Whilst the privacy implications of a global consensus model are easy to understand, the scalability implications are
perhaps more subtle, yet serious. In a consensus system, it is critical that all processors of a transaction reach
precisely the same conclusion as to its effects.  In situations where two transactions may act on the same data set,
it means that the two transactions must be processed in the same <em>order</em> by all nodes. If this were not the case then it
would be possible to devise situations where nodes processed transactions in different orders and reached different
conclusions as to the state of the system.  It is for this reason that systems like Ethereum effectively run
single-threaded, meaning the speed of the system is limited by the single-threaded performance of the slowest
machine on the network.</p>
<p>In Corda, we assume the data being processed represents financial agreements between identifiable parties and that these
institutions will adopt the system only if a significant number of such agreements can be managed by the platform.
As such, the system has to be able to support parallelisation of execution to the greatest extent possible,
whilst ensuring correct transaction ordering when two transactions seek to act on the same piece of shared state.</p>
<p>To achieve this, we must minimise the number of parties who need to receive and process copies of any given
transaction and we must minimise the extent to which two transactions seek to mutate (or supersede) any given piece
of shared state.</p>
<p>A key design decision, therefore, is what should be the most atomic unit of shared data in the system.  This decision
also has profound privacy implications: the more coarsely defined the shared data units, the larger the set of
actors who will likely have a stake in its accuracy and who must process and observe any update to it.</p>
<p>This becomes most obvious when we consider two models for representing cash balances and payments.</p>
<p>A simple account model for cash would define a data structure that maintained a balance at a particular bank for each
&#8220;account holder&#8221;. Every holder of a balance would need a copy of this structure and would thus need to process and
validate every payment transaction, learning about everybody else&#8217;s payments and balances in the process.
All payments across that set of accounts would have to be single-threaded across the platform, limiting maximum
throughput.</p>
<p>A more sophisticated example might create a data structure per account holder.
But, even here, I would leak my account balance to anybody to whom I ever made
a payment and I could only ever make one payment at a time, for the same reasons above.</p>
<p>A UTXO model would define a data structure that represented an <em>instance</em> of a claim against the bank. An account
holder could hold <em>many</em> such instances, the aggregate of which would reveal their balance at that institution.  However,
the account holder now only needs to reveal to their payee those instances consumed in making a payment to that payee.
This also means the payer could make several payments in parallel.   A downside is that the model is harder to understand.
However, we consider the privacy and scalability advantages to overwhelm the modest additional cognitive load this places
on those attempting to learn the system.</p>
<p>In what follows, further advantages and disadvantages of this design decision are explored.</p>
</div>
<div class="section" id="pros">
<h2>Pros<a class="headerlink" href="#pros" title="Permalink to this headline">¶</a></h2>
<p>The UTXO model has these advantages:</p>
<ul class="simple">
<li>Immutable ledger entries gives the usual advantages that a more functional approach brings: it&#8217;s easy to do analysis
on a static snapshot of the data and reason about the contents.</li>
<li>Because there are no accounts, it&#8217;s very easy to apply transactions in parallel even for high traffic legal entities
assuming sufficiently granular entries.</li>
<li>Transaction ordering becomes trivial: it is impossible to mis-order transactions due to the reliance on hash functions
to identify previous states. There is no need for sequence numbers or other things that are hard to provide in a
fully distributed system.</li>
<li>Conflict resolution boils down to the double spending problem, which places extremely minimal demands on consensus
algorithms (as the variable you&#8217;re trying to reach consensus on is a set of booleans).</li>
</ul>
</div>
<div class="section" id="cons">
<h2>Cons<a class="headerlink" href="#cons" title="Permalink to this headline">¶</a></h2>
<p>It also comes with some pretty serious complexities that in practice must be abstracted from developers:</p>
<ul class="simple">
<li>Representing numeric amounts using immutable entries is unnatural. For instance, if you receive $1000 and wish
to send someone $100, you have to consume the $1000 output and then create two more: a $100 for the recipient and
$900 back to yourself as change. The fact that this happens can leak private information to an observer.</li>
<li>Because users do need to think in terms of balances and statements, you have to layer this on top of the
underlying ledger: you can&#8217;t just read someone&#8217;s balance out of the system. Hence, the &#8220;wallet&#8221; / position manager.
Experience from those who have developed wallets for Bitcoin and other systems is that they can be complex pieces of code,
although the bulk of wallets&#8217; complexity in public systems is handling the lack of finality (and key management).</li>
<li>Whilst transactions can be applied in parallel, it is much harder to create them in parallel due to the need to
strictly enforce a total ordering.</li>
</ul>
<p>With respect to parallel creation, if the user is single threaded this is fine, but in a more complex situation
where you might want to be preparing multiple transactions in flight this can prove a limitation – in
the worst case where you have a single output that represents all your value, this forces you to serialise
the creation of every transaction. If transactions can be created and signed very fast that&#8217;s not a concern.
If there&#8217;s only a single user, that&#8217;s not a concern.</p>
<p>Both cases are typically true in the Bitcoin world, so users don&#8217;t suffer from this much. In the context of a
complex business with a large pool of shared funds, in which creation of transactions may be very slow due to the
need to get different humans to approve a tx using a signing device, this could quickly lead to frustrating
conflicts where someone approves a transaction and then discovers that it has become a double spend and
they must sign again. In the absolute worst case you could get a form of human livelock.</p>
<p>The tricky part about solving these problems is that the simplest way to express a payment request
(&#8220;send me $1000 to public key X&#8221;) inherently results in you receiving a single output, which then can
prove insufficiently granular to be convenient. In the Bitcoin space Mike Hearn and Gavin Andresen designed &#8220;BIP 70&#8221;
to solve this: it&#8217;s a simple binary format for requesting a payment and specifying exactly how you&#8217;d like to get paid,
including things like the shape of the transaction. It may seem that it&#8217;s an over complex approach: could you not
just immediately respend the big output back to yourself in order to split it? And yes, you could, until you hit
scenarios like &#8220;the machine requesting the payment doesn&#8217;t have the keys needed to spend it&#8221;,
which turn out to be very common. So it&#8217;s really more effective for a recipient to be able to say to the
sender, &#8220;here&#8217;s the kind of transaction I want you to send me&#8221;.  The <a class="reference internal" href="protocol-state-machines.html"><span class="doc">protocol framework</span></a>
may provide a vehicle to make such negotiations simpler.</p>
<p>A further challenge is privacy. Whilst our goal of not sending transactions to nodes that don&#8217;t &#8220;need to know&#8221;
helps, to verify a transaction you still need to verify all its dependencies and that can result in you receiving
lots of transactions that involve random third parties. The problems start when you have received lots of separate
payments and been careful not to make them linkable to your identity, but then you need to combine them all in a
single transaction to make a payment.</p>
<p>Mike Hearn wrote an article about this problem and techniques to minimise it in
<a class="reference external" href="https://medium.com/&#64;octskyward/merge-avoidance-7f95a386692f">this article</a> from 2013. This article
coined the term &#8220;merge avoidance&#8221;, which has never been implemented in the Bitcoin space,
although not due to lack of practicality.</p>
<p>A piece of future work for the wallet implementation will be to implement automated &#8220;grooming&#8221; of the wallet
to &#8220;reshape&#8221; outputs to useful/standardised sizes, for example, and to send outputs of complex transactions
back to their issuers for reissuance to &#8220;sever&#8221; long privacy-breaching chains.</p>
<p>Finally, it should be noted that some of the issues described here are not really &#8220;cons&#8221; of
the UTXO model; they&#8217;re just fundamental.
If you used many different anonymous accounts to preserve some privacy and then needed to
spend the contents of them all simultaneously, you&#8217;d hit the same problem, so it&#8217;s not
something that can be trivially fixed with data model changes.</p>
</div>
</div>


           </div>
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="transaction-data-types.html" class="btn btn-neutral float-right" title="Transaction Data Types" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="getting-set-up.html" class="btn btn-neutral" title="Getting set up" accesskey="p"><span class="fa fa-arrow-circle-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2016, Distributed Ledger Group, LLC.

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  

    <script type="text/javascript">
        var DOCUMENTATION_OPTIONS = {
            URL_ROOT:'./',
            VERSION:'latest',
            COLLAPSE_INDEX:false,
            FILE_SUFFIX:'.html',
            HAS_SOURCE:  true
        };
    </script>
      <script type="text/javascript" src="_static/jquery.js"></script>
      <script type="text/javascript" src="_static/underscore.js"></script>
      <script type="text/javascript" src="_static/doctools.js"></script>

  
    <script type="text/javascript" src="_static/js/theme.js"></script>
  

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.StickyNav.enable();
      });
  </script>
   

</body>
</html>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
 								<!DOCTYPE html>
 								<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
 								<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
 								<head>
 								  <meta charset="utf-8">
 								  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								  <title>Data model &mdash; R3 Corda latest documentation</title>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
 								    <link rel="stylesheet" href="_static/css/custom.css" type="text/css" />
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								    <link rel="top" title="R3 Corda latest documentation" href="index.html"/>
-												Rebuild documentation

											
										
										
											2016-06-13 17:27:53 +00:00
+								        <link rel="next" title="Transaction Data Types" href="transaction-data-types.html"/>
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								        <link rel="prev" title="Getting set up" href="getting-set-up.html"/>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
 								  <script src="_static/js/modernizr.min.js"></script>
 								</head>
 								<body class="wy-body-for-nav" role="document">
 								  <div class="wy-grid-for-nav">
 								    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 								      <div class="wy-side-scroll">
 								        <div class="wy-side-nav-search">
-												Regen docsite and include Dokka API docs for the first time

											
										
										
											2016-03-08 17:30:51 +00:00
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								            <a href="index.html" class="icon icon-home"> R3 Corda
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
 								          </a>
 								              <div class="version">
-												Regen docsite

											
										
										
											2016-02-25 12:29:28 +00:00
+								                latest
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								              </div>
 								<div role="search">
 								  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 								    <input type="text" name="q" placeholder="Search docs" />
 								    <input type="hidden" name="check_keywords" value="yes" />
 								    <input type="hidden" name="area" value="default" />
 								  </form>
 								</div>
-												Regen docsite and include Dokka API docs for the first time

											
										
										
											2016-03-08 17:30:51 +00:00
+								<br>
 								<a href="api/index.html">API reference</a>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								        </div>
 								        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								                <p class="caption"><span class="caption-text">Overview</span></p>
 								<ul class="current">
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="inthebox.html">What&#8217;s included?</a></li>
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="getting-set-up.html">Getting set up</a></li>
-												Documentation regen

											
										
										
											2016-04-13 09:29:54 +00:00
+								<li class="toctree-l1 current"><a class="current reference internal" href="#">Data model</a><ul>
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								<li class="toctree-l2"><a class="reference internal" href="#overview">Overview</a></li>
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								<li class="toctree-l2"><a class="reference internal" href="#comparison-with-bitcoin">Comparison with Bitcoin</a></li>
 								<li class="toctree-l2"><a class="reference internal" href="#comparison-with-ethereum">Comparison with Ethereum</a></li>
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								<li class="toctree-l2"><a class="reference internal" href="#rationale-for-and-tradeoffs-in-adopting-a-utxo-style-model">Rationale for and tradeoffs in adopting a UTXO-style model</a></li>
 								<li class="toctree-l2"><a class="reference internal" href="#rationale">Rationale</a></li>
 								<li class="toctree-l2"><a class="reference internal" href="#pros">Pros</a></li>
 								<li class="toctree-l2"><a class="reference internal" href="#cons">Cons</a></li>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								</ul>
 								</li>
-												Rebuild documentation

											
										
										
											2016-06-13 17:27:53 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="transaction-data-types.html">Transaction Data Types</a></li>
-												Docs: regen docsite

											
										
										
											2016-05-24 11:27:23 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="consensus.html">Consensus Model</a></li>
-												Docs: regen all docs

											
										
										
											2015-12-15 13:27:06 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="messaging.html">Networking and messaging</a></li>
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="running-the-demos.html">Running the demos</a></li>
-												Regen docsite

											
										
										
											2016-03-04 15:15:51 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="node-administration.html">Node administration</a></li>
-												Regen docsite and link IRS page into it (was previously invisible)

											
										
										
											2016-04-18 17:25:41 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="irs.html">The Interest Rate Swap Contract</a></li>
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								</ul>
 								<p class="caption"><span class="caption-text">Tutorials</span></p>
 								<ul>
-												Docs: regen docsite

											
										
										
											2016-05-24 11:27:23 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="where-to-start.html">Where to start</a></li>
 								<li class="toctree-l1"><a class="reference internal" href="tutorial-contract.html">Writing a contract</a></li>
-												Docs: regen all docs

											
										
										
											2015-12-15 13:27:06 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="protocol-state-machines.html">Protocol state machines</a></li>
-												Regen docs

											
										
										
											2016-03-15 16:13:01 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="oracles.html">Writing oracle services</a></li>
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								</ul>
 								<p class="caption"><span class="caption-text">Appendix</span></p>
 								<ul>
-												Docs: regen docsite

											
										
										
											2016-05-24 11:27:23 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="release-process.html">Release process</a></li>
-												Rebuild documentation

											
										
										
											2016-06-13 17:27:53 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="release-process.html#steps-to-cut-a-release">Steps to cut a release</a></li>
-												Docs: regen docsite

											
										
										
											2016-05-24 11:27:23 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="release-notes.html">Release notes</a></li>
-												Docs: regen all docs

											
										
										
											2015-12-15 13:27:06 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="visualiser.html">Using the visualiser</a></li>
-												Regen docsite.

											
										
										
											2016-01-27 15:01:52 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="codestyle.html">Code style guide</a></li>
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								<li class="toctree-l1"><a class="reference internal" href="building-the-docs.html">Building the documentation</a></li>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								</ul>
 								        </div>
 								      </div>
 								    </nav>
 								    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
 								      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
 								        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								        <a href="index.html">R3 Corda</a>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								      </nav>
 								      <div class="wy-nav-content">
 								        <div class="rst-content">
-												Documentation regen

											
										
										
											2016-04-13 09:29:54 +00:00
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
 								<div role="navigation" aria-label="breadcrumbs navigation">
 								  <ul class="wy-breadcrumbs">
 								    <li><a href="index.html">Docs</a> &raquo;</li>
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								    <li>Data model</li>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								      <li class="wy-breadcrumbs-aside">
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								            <a href="_sources/data-model.txt" rel="nofollow"> View page source</a>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
 								      </li>
 								  </ul>
 								  <hr/>
 								</div>
 								          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
 								           <div itemprop="articleBody">
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								  <div class="section" id="data-model">
 								<h1>Data model<a class="headerlink" href="#data-model" title="Permalink to this headline">¶</a></h1>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								<p>This article covers the data model: how <em>states</em>, <em>transactions</em> and <em>code contracts</em> interact with each other and
 								how they are represented in the code. It doesn&#8217;t attempt to give detailed design rationales or information on future
 								design elements: please refer to the R3 wiki for background information.</p>
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								<div class="section" id="overview">
 								<h2>Overview<a class="headerlink" href="#overview" title="Permalink to this headline">¶</a></h2>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								<p>We begin with the idea of a global ledger. In our model, although the ledger is shared, it is not always the case that
 								transactions and ledger entries are globally visible. In cases where a set of transactions stays within a small subgroup of
 								users it should be possible to keep the relevant data purely within that group.</p>
 								<p>To ensure consistency in a global, shared system where not all data may be visible to all participants, we rely
 								heavily on secure hashes like SHA-256 to identify things. The ledger is defined as a set of immutable <strong>states</strong>, which
 								are created and destroyed by digitally signed <strong>transactions</strong>. Each transaction points to a set of states that it will
 								consume/destroy, these are called <strong>inputs</strong>, and contains a set of new states that it will create, these are called
 								<strong>outputs</strong>.</p>
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								<p>States contain arbitrary data, but they always contain at minimum a hash of the bytecode of a
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								<strong>contract code</strong> file, which is a program expressed in JVM byte code that runs sandboxed inside a Java virtual machine.
 								Contract code (or just &#8220;contracts&#8221; in the rest of this document) are globally shared pieces of business logic.</p>
 								<p>Contracts define a <strong>verify function</strong>, which is a pure function given the entire transaction as input. To be considered
 								valid, the transaction must be <strong>accepted</strong> by the verify function of every contract pointed to by the
 								input and output states.</p>
 								<p>Beyond inputs and outputs, transactions may also contain <strong>commands</strong>, small data packets that
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								the platform does not interpret itself, but which can parameterise execution of the contracts. They can be thought of as
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								arguments to the verify function. Each command has a list of <strong>public keys</strong> associated with it. The platform ensures
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								that the transaction is signed by every key listed in the commands before the contracts start to execute. Thus, a verify
 								function can trust that all listed keys have signed the transaction but is responsible for verifying that any keys required
 								for the transaction to be valid from the verify function&#8217;s perspective are included in the list. Public keys
 								may be random/identityless for privacy, or linked to a well known legal identity, for example via a
 								<em>public key infrastructure</em> (PKI).</p>
-												Regen docsite

											
										
										
											2016-03-04 15:15:51 +00:00
+								<p>Commands are always embedded inside a transaction. Sometimes, there&#8217;s a larger piece of data that can be reused across
 								many different transactions. For this use case, we have <strong>attachments</strong>. Every transaction can refer to zero or more
 								attachments by hash. Attachments are always ZIP/JAR files, which may contain arbitrary content. Contract code can then
 								access the attachments by opening them as a JarInputStream (this is temporary and will change later).</p>
 								<p>Note that there is nothing that explicitly binds together specific inputs, outputs, commands or attachments. Instead
 								it&#8217;s up to the contract code to interpret the pieces inside the transaction and ensure they fit together correctly. This
 								is done to maximise flexibility for the contract developer.</p>
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								<p>Transactions may sometimes need to provide a contract with data from the outside world. Examples may include stock
 								prices, facts about events or the statuses of legal entities (e.g. bankruptcy), and so on. The providers of such
 								facts are called <strong>oracles</strong> and they provide facts to the ledger by signing transactions that contain commands they
-												Regen docsite

											
										
										
											2016-03-04 15:15:51 +00:00
+								recognise, or by creating signed attachments. The commands contain the fact and the signature shows agreement to that fact.
 								Time is also modelled as a fact, with the signature of a special kind of oracle called a <strong>timestamping authority</strong> (TSA).
 								A TSA signs a transaction if a pre-defined timestamping command in it defines a after/before time window that includes
 								&#8220;true time&#8221; (i.e. GPS time as calibrated to the US Naval Observatory). An oracle may prefer to generate a signed
 								attachment if the fact it&#8217;s creating is relatively static and may be referred to over and over again.</p>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								<p>As the same terminology often crops up in different distributed ledger designs, let&#8217;s compare this to other
-												Docs: regenerate the HTML

											
										
										
											2015-11-25 17:49:58 +00:00
+								distributed ledger systems you may be familiar with. You can find more detailed design rationales for why the platform
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								differs from existing systems in <a class="reference external" href="https://r3-cev.atlassian.net/wiki/display/AWG/Platform+Stream%3A+Corda">the R3 wiki</a>,
 								but to summarise, the driving factors are:</p>
-												Docs: regenerate the HTML

											
										
										
											2015-11-25 17:49:58 +00:00
+								<ul class="simple">
 								<li>Improved contract flexibility vs Bitcoin</li>
 								<li>Improved scalability vs Ethereum, as well as ability to keep parts of the transaction graph private (yet still uniquely addressable)</li>
 								<li>No reliance on proof of work</li>
-												Documentation regen

											
										
										
											2016-04-13 09:29:54 +00:00
+								<li>Re-use of existing sandboxing virtual machines</li>
-												Docs: regenerate the HTML

											
										
										
											2015-11-25 17:49:58 +00:00
+								<li>Use of type safe GCd implementation languages.</li>
 								<li>Simplified auditing</li>
 								</ul>
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								</div>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								<div class="section" id="comparison-with-bitcoin">
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								<h2>Comparison with Bitcoin<a class="headerlink" href="#comparison-with-bitcoin" title="Permalink to this headline">¶</a></h2>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								<p>Similarities:</p>
 								<ul class="simple">
 								<li>The basic notion of immutable states that are consumed and created by transactions is the same.</li>
 								<li>The notion of transactions having multiple inputs and outputs is the same. Bitcoin sometimes refers to the ledger
 								as the unspent transaction output set (UTXO set) as a result.</li>
 								<li>Like in Bitcoin, a contract is pure function. Contracts do not have storage or the ability to interact with anything.
 								Given the same transaction, a contract&#8217;s accept function always yields exactly the same result.</li>
 								<li>Bitcoin output scripts are parameterised by the input scripts in the spending transaction. This is somewhat similar
 								to our notion of a <em>command</em>.</li>
 								<li>Bitcoin transactions, like ours, refer to the states they consume by using a (txhash, index) pair. The Bitcoin
 								protocol calls these &#8220;outpoints&#8221;. In our prototype code they are known as <code class="docutils literal"><span class="pre">StateRefs</span></code> but the concept is identical.</li>
 								<li>Bitcoin transactions have an associated timestamp (the time at which they are mined).</li>
 								</ul>
 								<p>Differences:</p>
 								<ul class="simple">
 								<li>A Bitcoin transaction has a single, rigid data format. A &#8220;state&#8221; in Bitcoin is always a (quantity of bitcoin, script)
 								pair and cannot hold any other data. Some people have been known to try and hack around this limitation by embedding
 								data in semi-standardised places in the contract code so the data can be extracted through pattern matching, but this
 								is a poor approach. Our states can include arbitrary typed data.</li>
 								<li>A Bitcoin transaction&#8217;s acceptance is controlled only by the contract code in the consumed input states. In practice
 								this has proved limiting. Our transactions invoke not only input contracts but also the contracts of the outputs.</li>
 								<li>A Bitcoin script can only be given a fixed set of byte arrays as the input. This means there&#8217;s no way for a contract
 								to examine the structure of the entire transaction, which severely limits what contracts can do.</li>
 								<li>Our contracts are Turing-complete and can be written in any ordinary programming language that targets the JVM.</li>
-												Docs: regenerate the HTML

											
										
										
											2015-11-25 17:49:58 +00:00
+								<li>Our transactions and contracts have to get their time from an attached timestamp rather than a block chain. This is
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								important given that we are currently considering block-free conflict resolution algorithms.</li>
 								<li>We use the term &#8220;contract&#8221; to refer to a bundle of business logic that may handle various different tasks, beyond
 								transaction verification. For instance, currently our contracts also include code for creating valid transactions
 								(this is often called &#8220;wallet code&#8221; in Bitcoin).</li>
 								</ul>
 								</div>
 								<div class="section" id="comparison-with-ethereum">
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								<h2>Comparison with Ethereum<a class="headerlink" href="#comparison-with-ethereum" title="Permalink to this headline">¶</a></h2>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								<p>Similarities:</p>
 								<ul class="simple">
 								<li>Like Ethereum, code runs inside a relatively powerful virtual machine and can contain complex logic. Non-assembly
 								based programming languages can be used for contract programming.</li>
 								<li>They are both intended for the modelling of many different kinds of financial contract.</li>
 								</ul>
 								<p>Differences:</p>
 								<ul class="simple">
 								<li>The term &#8220;contract&#8221; in Ethereum refers to an <em>instantiation</em> of a program that is replicated and maintained by
 								every participating node. This instantiation is very much like an object in an OO program: it can receive and send
-												Docs: regenerate the HTML

											
										
										
											2015-11-25 17:49:58 +00:00
+								messages, update local storage and so on. In contrast, we use the term &#8220;contract&#8221; to refer to a set of functions, only
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								one of which is a part of keeping the system synchronised (the verify function). That function is pure and
 								stateless i.e. it may not interact with any other part of the system whilst executing.</li>
 								<li>There is no notion of an &#8220;account&#8221;, as there is in Ethereum.</li>
 								<li>As contracts don&#8217;t have any kind of mutable storage, there is no notion of a &#8220;message&#8221; as in Ethereum.</li>
 								<li>Ethereum claims to be a platform not only for financial logic, but literally any kind of application at all. Our
 								platform considers non-financial applications to be out of scope.</li>
 								</ul>
 								</div>
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								<div class="section" id="rationale-for-and-tradeoffs-in-adopting-a-utxo-style-model">
 								<h2>Rationale for and tradeoffs in adopting a UTXO-style model<a class="headerlink" href="#rationale-for-and-tradeoffs-in-adopting-a-utxo-style-model" title="Permalink to this headline">¶</a></h2>
 								<p>As discussed above, Corda uses the so-called &#8220;UTXO set&#8221; model (unspent transaction output). In this model, the database
 								does not track accounts or balances. Instead all database entries are immutable. An entry is either spent or not spent
 								but it cannot be changed. In Bitcoin, spentness is implemented simply as deletion – the inputs of an accepted transaction
 								are deleted and the outputs created.</p>
 								<p>This approach has some advantages and some disadvantages, which is why some platforms like Ethereum have tried
 								(or are trying) to abstract this choice away and support a more traditional account-like model.  We have explicitly
 								chosen <em>not</em> to do this and our decision to adopt a UTXO-style model is a deliberate one.  In the section below,
 								the rationale for this decision and its pros and cons of this choice are outlined.</p>
 								</div>
 								<div class="section" id="rationale">
 								<h2>Rationale<a class="headerlink" href="#rationale" title="Permalink to this headline">¶</a></h2>
 								<p>Corda, in common with other blockchain-like platforms, is designed to bring parties to shared sets of data into
 								consensus as to the existence, content and allowable evolutions of those data sets. However, Corda is designed with the
 								explicit aim of avoiding, to the extent possible, the scalability and privacy implications that arise from those platforms&#8217;
 								decisions to adopt a global broadcast model.</p>
 								<p>Whilst the privacy implications of a global consensus model are easy to understand, the scalability implications are
 								perhaps more subtle, yet serious. In a consensus system, it is critical that all processors of a transaction reach
 								precisely the same conclusion as to its effects.  In situations where two transactions may act on the same data set,
 								it means that the two transactions must be processed in the same <em>order</em> by all nodes. If this were not the case then it
 								would be possible to devise situations where nodes processed transactions in different orders and reached different
 								conclusions as to the state of the system.  It is for this reason that systems like Ethereum effectively run
 								single-threaded, meaning the speed of the system is limited by the single-threaded performance of the slowest
 								machine on the network.</p>
 								<p>In Corda, we assume the data being processed represents financial agreements between identifiable parties and that these
 								institutions will adopt the system only if a significant number of such agreements can be managed by the platform.
 								As such, the system has to be able to support parallelisation of execution to the greatest extent possible,
 								whilst ensuring correct transaction ordering when two transactions seek to act on the same piece of shared state.</p>
 								<p>To achieve this, we must minimise the number of parties who need to receive and process copies of any given
 								transaction and we must minimise the extent to which two transactions seek to mutate (or supersede) any given piece
 								of shared state.</p>
 								<p>A key design decision, therefore, is what should be the most atomic unit of shared data in the system.  This decision
 								also has profound privacy implications: the more coarsely defined the shared data units, the larger the set of
 								actors who will likely have a stake in its accuracy and who must process and observe any update to it.</p>
 								<p>This becomes most obvious when we consider two models for representing cash balances and payments.</p>
 								<p>A simple account model for cash would define a data structure that maintained a balance at a particular bank for each
 								&#8220;account holder&#8221;. Every holder of a balance would need a copy of this structure and would thus need to process and
 								validate every payment transaction, learning about everybody else&#8217;s payments and balances in the process.
 								All payments across that set of accounts would have to be single-threaded across the platform, limiting maximum
 								throughput.</p>
 								<p>A more sophisticated example might create a data structure per account holder.
 								But, even here, I would leak my account balance to anybody to whom I ever made
 								a payment and I could only ever make one payment at a time, for the same reasons above.</p>
 								<p>A UTXO model would define a data structure that represented an <em>instance</em> of a claim against the bank. An account
 								holder could hold <em>many</em> such instances, the aggregate of which would reveal their balance at that institution.  However,
 								the account holder now only needs to reveal to their payee those instances consumed in making a payment to that payee.
 								This also means the payer could make several payments in parallel.   A downside is that the model is harder to understand.
 								However, we consider the privacy and scalability advantages to overwhelm the modest additional cognitive load this places
 								on those attempting to learn the system.</p>
 								<p>In what follows, further advantages and disadvantages of this design decision are explored.</p>
 								</div>
 								<div class="section" id="pros">
 								<h2>Pros<a class="headerlink" href="#pros" title="Permalink to this headline">¶</a></h2>
 								<p>The UTXO model has these advantages:</p>
 								<ul class="simple">
 								<li>Immutable ledger entries gives the usual advantages that a more functional approach brings: it&#8217;s easy to do analysis
 								on a static snapshot of the data and reason about the contents.</li>
 								<li>Because there are no accounts, it&#8217;s very easy to apply transactions in parallel even for high traffic legal entities
 								assuming sufficiently granular entries.</li>
 								<li>Transaction ordering becomes trivial: it is impossible to mis-order transactions due to the reliance on hash functions
 								to identify previous states. There is no need for sequence numbers or other things that are hard to provide in a
 								fully distributed system.</li>
 								<li>Conflict resolution boils down to the double spending problem, which places extremely minimal demands on consensus
 								algorithms (as the variable you&#8217;re trying to reach consensus on is a set of booleans).</li>
 								</ul>
 								</div>
 								<div class="section" id="cons">
 								<h2>Cons<a class="headerlink" href="#cons" title="Permalink to this headline">¶</a></h2>
 								<p>It also comes with some pretty serious complexities that in practice must be abstracted from developers:</p>
 								<ul class="simple">
 								<li>Representing numeric amounts using immutable entries is unnatural. For instance, if you receive $1000 and wish
 								to send someone $100, you have to consume the $1000 output and then create two more: a $100 for the recipient and
 								$900 back to yourself as change. The fact that this happens can leak private information to an observer.</li>
 								<li>Because users do need to think in terms of balances and statements, you have to layer this on top of the
 								underlying ledger: you can&#8217;t just read someone&#8217;s balance out of the system. Hence, the &#8220;wallet&#8221; / position manager.
 								Experience from those who have developed wallets for Bitcoin and other systems is that they can be complex pieces of code,
 								although the bulk of wallets&#8217; complexity in public systems is handling the lack of finality (and key management).</li>
 								<li>Whilst transactions can be applied in parallel, it is much harder to create them in parallel due to the need to
 								strictly enforce a total ordering.</li>
 								</ul>
 								<p>With respect to parallel creation, if the user is single threaded this is fine, but in a more complex situation
 								where you might want to be preparing multiple transactions in flight this can prove a limitation – in
 								the worst case where you have a single output that represents all your value, this forces you to serialise
 								the creation of every transaction. If transactions can be created and signed very fast that&#8217;s not a concern.
 								If there&#8217;s only a single user, that&#8217;s not a concern.</p>
 								<p>Both cases are typically true in the Bitcoin world, so users don&#8217;t suffer from this much. In the context of a
 								complex business with a large pool of shared funds, in which creation of transactions may be very slow due to the
 								need to get different humans to approve a tx using a signing device, this could quickly lead to frustrating
 								conflicts where someone approves a transaction and then discovers that it has become a double spend and
 								they must sign again. In the absolute worst case you could get a form of human livelock.</p>
 								<p>The tricky part about solving these problems is that the simplest way to express a payment request
 								(&#8220;send me $1000 to public key X&#8221;) inherently results in you receiving a single output, which then can
 								prove insufficiently granular to be convenient. In the Bitcoin space Mike Hearn and Gavin Andresen designed &#8220;BIP 70&#8221;
 								to solve this: it&#8217;s a simple binary format for requesting a payment and specifying exactly how you&#8217;d like to get paid,
 								including things like the shape of the transaction. It may seem that it&#8217;s an over complex approach: could you not
 								just immediately respend the big output back to yourself in order to split it? And yes, you could, until you hit
 								scenarios like &#8220;the machine requesting the payment doesn&#8217;t have the keys needed to spend it&#8221;,
 								which turn out to be very common. So it&#8217;s really more effective for a recipient to be able to say to the
 								sender, &#8220;here&#8217;s the kind of transaction I want you to send me&#8221;.  The <a class="reference internal" href="protocol-state-machines.html"><span class="doc">protocol framework</span></a>
 								may provide a vehicle to make such negotiations simpler.</p>
 								<p>A further challenge is privacy. Whilst our goal of not sending transactions to nodes that don&#8217;t &#8220;need to know&#8221;
 								helps, to verify a transaction you still need to verify all its dependencies and that can result in you receiving
 								lots of transactions that involve random third parties. The problems start when you have received lots of separate
 								payments and been careful not to make them linkable to your identity, but then you need to combine them all in a
 								single transaction to make a payment.</p>
 								<p>Mike Hearn wrote an article about this problem and techniques to minimise it in
 								<a class="reference external" href="https://medium.com/&#64;octskyward/merge-avoidance-7f95a386692f">this article</a> from 2013. This article
 								coined the term &#8220;merge avoidance&#8221;, which has never been implemented in the Bitcoin space,
 								although not due to lack of practicality.</p>
 								<p>A piece of future work for the wallet implementation will be to implement automated &#8220;grooming&#8221; of the wallet
 								to &#8220;reshape&#8221; outputs to useful/standardised sizes, for example, and to send outputs of complex transactions
 								back to their issuers for reissuance to &#8220;sever&#8221; long privacy-breaching chains.</p>
 								<p>Finally, it should be noted that some of the issues described here are not really &#8220;cons&#8221; of
 								the UTXO model; they&#8217;re just fundamental.
 								If you used many different anonymous accounts to preserve some privacy and then needed to
 								spend the contents of them all simultaneously, you&#8217;d hit the same problem, so it&#8217;s not
 								something that can be trivially fixed with data model changes.</p>
 								</div>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								</div>
 								           </div>
 								          </div>
 								          <footer>
 								    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-												Rebuild documentation

											
										
										
											2016-06-13 17:27:53 +00:00
+								        <a href="transaction-data-types.html" class="btn btn-neutral float-right" title="Transaction Data Types" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
-												Docs: regen HTML + reorganise left hand menu a bit.

											
										
										
											2015-12-22 15:15:38 +00:00
+								        <a href="getting-set-up.html" class="btn btn-neutral" title="Getting set up" accesskey="p"><span class="fa fa-arrow-circle-left"></span> Previous</a>
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
 								    </div>
 								  <hr/>
 								  <div role="contentinfo">
 								    <p>
-												Regen docsite

											
										
										
											2016-05-09 16:13:23 +00:00
+								        &copy; Copyright 2016, Distributed Ledger Group, LLC.
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
 								    </p>
 								  </div>
 								  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
 								</footer>
 								        </div>
 								      </div>
 								    </section>
 								  </div>
 								    <script type="text/javascript">
 								        var DOCUMENTATION_OPTIONS = {
 								            URL_ROOT:'./',
-												Regen docsite

											
										
										
											2016-02-25 12:29:28 +00:00
+								            VERSION:'latest',
-												Documentation HTML build

											
										
										
											2015-11-25 13:29:51 +00:00
+								            COLLAPSE_INDEX:false,
 								            FILE_SUFFIX:'.html',
 								            HAS_SOURCE:  true
 								        };
 								    </script>
 								      <script type="text/javascript" src="_static/jquery.js"></script>
 								      <script type="text/javascript" src="_static/underscore.js"></script>
 								      <script type="text/javascript" src="_static/doctools.js"></script>
 								    <script type="text/javascript" src="_static/js/theme.js"></script>
 								  <script type="text/javascript">
 								      jQuery(function () {
 								          SphinxRtdTheme.StickyNav.enable();
 								      });
 								  </script>
 								</body>
 								</html>