tahoe.cfg: add controls for k and N (and shares-of-happiness)

This commit is contained in:
Brian Warner 2008-11-18 00:29:44 -07:00
parent ead0e4d6ca
commit cae54ab118
6 changed files with 80 additions and 1 deletions

3
NEWS
View File

@ -61,6 +61,9 @@ commented out. (ticket #518)
tahoe.cfg now has controls for the foolscap "keepalive" and "disconnect"
timeouts (#521).
tahoe.cfg now has controls for the encoding parameters: "shares.needed" and
"shares.total" in the "[client]" section. The default parameters are still
3-of-10.
** CLI Changes

View File

@ -232,6 +232,40 @@ stats_gatherer.furl = (FURL string, optional)
If provided, the node will connect to the given stats gatherer and provide
it with operational statistics.
shares.needed = (int, optional) aka "k"
shares.total = (int, optional) aka "N", N >= k
shares.happy = (int, optional) k <= happy <= N
These three values set the default encoding parameters. Each time a new file
is uploaded, erasure-coding is used to break the ciphertext into separate
pieces. There will be "N" (i.e. shares.total) pieces created, and the file
will be recoverable if any "k" (i.e. shares.needed) pieces are retrieved.
The default values are 3-of-10 (i.e. shares.needed = 3, shares.total = 10).
Setting k to 1 is equivalent to simple replication (uploading N copies of
the file).
These values control the tradeoff between storage overhead, performance, and
reliability. To a first approximation, a 1MB file will use (1MB*N/k) of
backend storage space (the actual value will be a bit more, because of other
forms of overhead). Up to N-k shares can be lost before the file becomes
unrecoverable, so assuming there are at least N servers, up to N-k servers
can be offline without losing the file. So large N/k ratios are more
reliable, and small N/k ratios use less disk space. Clearly, k must never be
smaller than N.
Large values of N will slow down upload operations slightly, since more
servers must be involved, and will slightly increase storage overhead due to
the hash trees that are created. Large values of k will cause downloads to
be marginally slower, because more servers must be involved. N cannot be
larger than 256, because of the 8-bit erasure-coding algorithm that Tahoe
uses.
If servers are lost during an upload, shares.happy determines whether the
upload is considered successful or not. If at least "shares.happy" shares
were placed, the upload is declared a success, otherwise it is declared a
failure. The default value is 7. This value must not be smaller than k nor
larger than N.
== Storage Server Configuration ==

View File

@ -65,6 +65,7 @@ class Client(node.Node, pollmixin.PollMixin):
node.Node.__init__(self, basedir)
self.started_timestamp = time.time()
self.logSource="Client"
self.DEFAULT_ENCODING_PARAMETERS = self.DEFAULT_ENCODING_PARAMETERS.copy()
self.init_introducer_client()
self.init_stats_provider()
self.init_lease_secret()
@ -185,6 +186,10 @@ class Client(node.Node, pollmixin.PollMixin):
def init_client(self):
helper_furl = self.get_config("client", "helper.furl", None)
DEP = self.DEFAULT_ENCODING_PARAMETERS
DEP["k"] = int(self.get_config("client", "shares.needed", DEP["k"]))
DEP["n"] = int(self.get_config("client", "shares.total", DEP["n"]))
DEP["happy"] = int(self.get_config("client", "shares.happy", DEP["happy"]))
convergence_s = self.get_or_create_private_config('convergence', _make_secret)
self.convergence = base32.a2b(convergence_s)
self._node_cache = weakref.WeakValueDictionary() # uri -> node

View File

@ -104,6 +104,9 @@ def create_client(basedir, config, out=sys.stdout, err=sys.stderr):
c.write("helper.furl =\n")
c.write("#key_generator.furl =\n")
c.write("#stats_gatherer.furl =\n")
c.write("#shares.needed = 3\n")
c.write("#shares.happy = 7\n")
c.write("#shares.total = 10\n")
c.write("\n")
boolstr = {True:"true", False:"false"}

View File

@ -418,6 +418,10 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin):
if self.stats_gatherer_furl:
write("stats_gatherer.furl", self.stats_gatherer_furl)
# give subclasses a chance to append liens to the node's tahoe.cfg
# files before they are launched.
self._set_up_nodes_extra_config()
# start client[0], wait for it's tub to be ready (at which point it
# will have registered the helper furl).
c = self.add_service(client.Client(basedir=basedirs[0]))
@ -452,6 +456,10 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin):
d.addCallback(_connected)
return d
def _set_up_nodes_extra_config(self):
# for overriding by subclasses
pass
def _grab_stats(self, res):
d = self.stats_gatherer.poll()
return d

View File

@ -7,11 +7,12 @@ from twisted.python import log
from twisted.internet import defer
from foolscap import eventual
from allmydata import uri
from allmydata import uri, monitor
from allmydata.immutable import upload
from allmydata.interfaces import IFileURI, FileTooLargeError, NotEnoughSharesError
from allmydata.util.assertutil import precondition
from allmydata.util.deferredutil import DeferredListShouldSucceed
from common import SystemTestMixin
from common_util import ShouldFailMixin
MiB = 1024*1024
@ -542,6 +543,31 @@ class StorageIndex(unittest.TestCase):
d.addCallback(_done)
return d
class EncodingParameters(SystemTestMixin, unittest.TestCase):
def test_configure_parameters(self):
self.basedir = self.mktemp()
DATA = "data" * 100
u = upload.Data(DATA, convergence="")
d = self.set_up_nodes()
d.addCallback(lambda res: self.clients[0].upload(u))
d.addCallback(lambda ur: self.clients[0].create_node_from_uri(ur.uri))
m = monitor.Monitor()
d.addCallback(lambda fn: fn.check(m))
def _check(cr):
data = cr.get_data()
self.failUnlessEqual(data["count-shares-needed"], 7)
self.failUnlessEqual(data["count-shares-expected"], 12)
d.addCallback(_check)
return d
def _set_up_nodes_extra_config(self):
f = open(os.path.join(self.getdir("client0"), "tahoe.cfg"), "wt")
f.write("\n")
f.write("[client]\n")
f.write("shares.needed = 7\n")
f.write("shares.total = 12\n")
f.write("\n")
f.close()
# TODO:
# upload with exactly 75 peers (shares_of_happiness)