misc/ringsim.py: make it deterministic, more detail about grid-is-full behavior

This commit is contained in:
Brian Warner 2009-12-26 21:48:32 -05:00
parent aa102da804
commit 800fc176ec

View File

@ -3,9 +3,9 @@
# used to discuss ticket #302: "stop permuting peerlist?"
import time
import random, math
import math
from hashlib import sha1, md5, sha256
sha1 = md5
myhash = md5
# md5: 1520 "uploads" per second
# sha1: 1350 ups
# sha256: 930 ups
@ -36,14 +36,20 @@ def abbreviate_space(s, SI=True):
return r(s/(U*U*U*U), "T")
return r(s/(U*U*U*U*U), "P")
def make_up_a_file_size(max=2**31):
#return (2 ** random.randrange(8, 31)) # avg=??
return random.randrange(max) # avg 1GB
sizes = [make_up_a_file_size() for i in range(10000)]
def make_up_a_file_size(seed):
h = int(myhash(seed).hexdigest(),16)
max=2**31
if 1: # exponential distribution
e = 8 + (h % (31-8))
return 2 ** e
# uniform distribution
return h % max # avg 1GB
sizes = [make_up_a_file_size(str(i)) for i in range(10000)]
avg_filesize = sum(sizes)/len(sizes)
print "average file size:", abbreviate_space(avg_filesize)
SERVER_CAPACITY = 10**12 * 1000
SERVER_CAPACITY = 10**12
class Server:
def __init__(self, nodeid, capacity):
@ -67,14 +73,19 @@ class Server:
return "<%s %s>" % (self.__class__.__name__, self.nodeid)
class Ring:
SHOW_MINMAX = False
def __init__(self, numservers, seed, permute):
self.servers = []
for i in range(numservers):
nodeid = sha1(str(seed)+str(i)).hexdigest()
nodeid = myhash(str(seed)+str(i)).hexdigest()
capacity = SERVER_CAPACITY
s = Server(nodeid, capacity)
self.servers.append(s)
self.servers.sort(key=lambda s: s.nodeid)
self.permute = permute
#self.list_servers()
def list_servers(self):
for i in range(len(self.servers)):
s = self.servers[i]
next_s = self.servers[(i+1)%len(self.servers)]
@ -88,12 +99,11 @@ class Ring:
print "sorted by delta"
for s in sorted(self.servers, key=lambda s:s.prev_diff):
print s, s.prev_diff
self.permute = permute
def servers_for_si(self, si):
if self.permute:
def sortkey(s):
return sha1(s.nodeid+si).digest()
return myhash(s.nodeid+si).digest()
return sorted(self.servers, key=sortkey)
for i in range(len(self.servers)):
if self.servers[i].nodeid >= si:
@ -136,9 +146,10 @@ class Ring:
#print "average_usage:", abbreviate_space(average_usagepf)
print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
100.0*sd_of_total)
s2 = sorted(self.servers, key=lambda s: s.used)
print "least:", s2[0].nodeid
print "most:", s2[-1].nodeid
if self.SHOW_MINMAX:
s2 = sorted(self.servers, key=lambda s: s.used)
print "least:", s2[0].nodeid
print "most:", s2[-1].nodeid
class Options(usage.Options):
@ -147,6 +158,7 @@ class Options(usage.Options):
("N", "N", 10, "total shares", int),
("servers", None, 100, "number of servers", int),
("seed", None, None, "seed to use for creating ring"),
("fileseed", None, "blah", "seed to use for creating files"),
("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
]
def postOptions(self):
@ -155,31 +167,56 @@ class Options(usage.Options):
def do_run(ring, opts):
avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
fileseed = opts["fileseed"]
start = time.time()
all_servers_have_room = True
no_files_have_wrapped = True
for filenum in count(0):
#used = list(reversed(sorted([s.used for s in ring.servers])))
#used = [s.used for s in ring.servers]
#print used
filesize = make_up_a_file_size()
si = myhash(fileseed+str(filenum)).hexdigest()
filesize = make_up_a_file_size(si)
sharesize = filesize / opts["k"]
si = sha1(str(random.randrange(2**40))).hexdigest()
if filenum%4000==0 and filenum > 1:
ring.dump_usage(filenum, avg_space_per_file)
servers = ring.servers_for_si(si)
#print ring.show_servers(servers[:opts["N"]])
remaining_shares = opts["N"]
index = 0
server_was_full = False
file_was_wrapped = False
remaining_servers = set(servers)
while remaining_shares:
if index >= len(servers):
index = 0
file_was_wrapped = True
s = servers[index]
accepted = s.upload(sharesize)
if not accepted:
return filenum # number of files successfully uploaded
server_was_full = True
remaining_servers.discard(s)
if not remaining_servers:
print "-- GRID IS FULL"
ring.dump_usage(filenum, avg_space_per_file)
return filenum
index += 1
continue
remaining_shares -= 1
index += 1
# file is done being uploaded
if server_was_full and all_servers_have_room:
all_servers_have_room = False
print "-- FIRST SERVER FULL"
ring.dump_usage(filenum, avg_space_per_file)
if file_was_wrapped and no_files_have_wrapped:
no_files_have_wrapped = False
print "-- FIRST FILE WRAPPED"
ring.dump_usage(filenum, avg_space_per_file)
def do_ring(opts):
#seed = str(random.randrange(2**31))
total_capacity = opts["servers"]*SERVER_CAPACITY
avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
avg_files = total_capacity / avg_space_per_file