tahoe-lafs/misc/ringsim.py
2009-12-26 01:03:39 -05:00

203 lines
6.8 KiB
Python
Executable File

#! /usr/bin/python
# used to discuss ticket #302: "stop permuting peerlist?"
import time
import random, math
from hashlib import sha1, md5, sha256
sha1 = md5
# md5: 1520 "uploads" per second
# sha1: 1350 ups
# sha256: 930 ups
from itertools import count
from twisted.python import usage
def abbreviate_space(s, SI=True):
if s is None:
return "unknown"
if SI:
U = 1000.0
isuffix = "B"
else:
U = 1024.0
isuffix = "iB"
def r(count, suffix):
return "%.2f %s%s" % (count, suffix, isuffix)
if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode
return "%d B" % s
if s < U*U:
return r(s/U, "k")
if s < U*U*U:
return r(s/(U*U), "M")
if s < U*U*U*U:
return r(s/(U*U*U), "G")
if s < U*U*U*U*U:
return r(s/(U*U*U*U), "T")
return r(s/(U*U*U*U*U), "P")
def make_up_a_file_size(max=2**31):
#return (2 ** random.randrange(8, 31)) # avg=??
return random.randrange(max) # avg 1GB
sizes = [make_up_a_file_size() for i in range(10000)]
avg_filesize = sum(sizes)/len(sizes)
print "average file size:", abbreviate_space(avg_filesize)
SERVER_CAPACITY = 10**12 * 1000
class Server:
def __init__(self, nodeid, capacity):
self.nodeid = nodeid
self.used = 0
self.capacity = capacity
self.numshares = 0
self.full_at_tick = None
def upload(self, sharesize):
if self.used + sharesize < self.capacity:
self.used += sharesize
self.numshares += 1
return True
return False
def __repr__(self):
if self.full_at_tick is not None:
return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick)
else:
return "<%s %s>" % (self.__class__.__name__, self.nodeid)
class Ring:
def __init__(self, numservers, seed, permute):
self.servers = []
for i in range(numservers):
nodeid = sha1(str(seed)+str(i)).hexdigest()
capacity = SERVER_CAPACITY
s = Server(nodeid, capacity)
self.servers.append(s)
self.servers.sort(key=lambda s: s.nodeid)
for i in range(len(self.servers)):
s = self.servers[i]
next_s = self.servers[(i+1)%len(self.servers)]
diff = "%032x" % (int(next_s.nodeid,16) - int(s.nodeid,16))
s.next_diff = diff
prev_s = self.servers[(i-1)%len(self.servers)]
diff = "%032x" % (int(s.nodeid,16) - int(prev_s.nodeid,16))
s.prev_diff = diff
print s, s.prev_diff
print "sorted by delta"
for s in sorted(self.servers, key=lambda s:s.prev_diff):
print s, s.prev_diff
self.permute = permute
def servers_for_si(self, si):
if self.permute:
def sortkey(s):
return sha1(s.nodeid+si).digest()
return sorted(self.servers, key=sortkey)
for i in range(len(self.servers)):
if self.servers[i].nodeid >= si:
return self.servers[i:] + self.servers[:i]
return list(self.servers)
def show_servers(self, picked):
bits = []
for s in self.servers:
if s in picked:
bits.append("1")
else:
bits.append("0")
#d = [s in picked and "1" or "0" for s in self.servers]
return "".join(bits)
def dump_usage(self, numfiles, avg_space_per_file):
print "uploaded", numfiles
# avg_space_per_file measures expected grid-wide ciphertext per file
used = list(reversed(sorted([s.used for s in self.servers])))
# used is actual per-server ciphertext
usedpf = [1.0*u/numfiles for u in used]
# usedpf is actual per-server-per-file ciphertext
#print "min/max usage: %s/%s" % (abbreviate_space(used[-1]),
# abbreviate_space(used[0]))
avg_usage_per_file = avg_space_per_file/len(self.servers)
# avg_usage_per_file is expected per-server-per-file ciphertext
spreadpf = usedpf[0] - usedpf[-1]
average_usagepf = sum(usedpf) / len(usedpf)
variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1)
std_deviation = math.sqrt(variance)
sd_of_total = std_deviation / avg_usage_per_file
print "min/max/(exp) usage-pf-ps %s/%s/(%s):" % (
abbreviate_space(usedpf[-1]),
abbreviate_space(usedpf[0]),
abbreviate_space(avg_usage_per_file) ),
print "spread-pf: %s (%.2f%%)" % (
abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file),
#print "average_usage:", abbreviate_space(average_usagepf)
print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
100.0*sd_of_total)
s2 = sorted(self.servers, key=lambda s: s.used)
print "least:", s2[0].nodeid
print "most:", s2[-1].nodeid
class Options(usage.Options):
optParameters = [
("k", "k", 3, "required shares", int),
("N", "N", 10, "total shares", int),
("servers", None, 100, "number of servers", int),
("seed", None, None, "seed to use for creating ring"),
("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
]
def postOptions(self):
assert self["seed"]
def do_run(ring, opts):
avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
start = time.time()
for filenum in count(0):
#used = list(reversed(sorted([s.used for s in ring.servers])))
#used = [s.used for s in ring.servers]
#print used
filesize = make_up_a_file_size()
sharesize = filesize / opts["k"]
si = sha1(str(random.randrange(2**40))).hexdigest()
if filenum%4000==0 and filenum > 1:
ring.dump_usage(filenum, avg_space_per_file)
servers = ring.servers_for_si(si)
#print ring.show_servers(servers[:opts["N"]])
remaining_shares = opts["N"]
index = 0
while remaining_shares:
s = servers[index]
accepted = s.upload(sharesize)
if not accepted:
return filenum # number of files successfully uploaded
remaining_shares -= 1
index += 1
def do_ring(opts):
#seed = str(random.randrange(2**31))
total_capacity = opts["servers"]*SERVER_CAPACITY
avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
avg_files = total_capacity / avg_space_per_file
print "expected number of uploads:", avg_files
if opts["permute"]:
print " PERMUTED"
else:
print " LINEAR"
seed = opts["seed"]
ring = Ring(opts["servers"], seed, opts["permute"])
num_files = do_run(ring, opts)
def run(opts):
do_ring(opts)
if __name__ == "__main__":
opts = Options()
opts.parseOptions()
run(opts)