2009-12-26 01:03:39 -05:00
|
|
|
#! /usr/bin/python
|
|
|
|
|
|
|
|
# used to discuss ticket #302: "stop permuting peerlist?"
|
|
|
|
|
2011-10-07 03:30:31 +00:00
|
|
|
# import time
|
2019-03-22 11:40:58 +01:00
|
|
|
|
|
|
|
from __future__ import print_function
|
|
|
|
|
2009-12-26 21:48:32 -05:00
|
|
|
import math
|
2011-10-07 03:30:31 +00:00
|
|
|
from hashlib import md5 # sha1, sha256
|
2009-12-26 21:48:32 -05:00
|
|
|
myhash = md5
|
2009-12-26 01:03:39 -05:00
|
|
|
# md5: 1520 "uploads" per second
|
|
|
|
# sha1: 1350 ups
|
|
|
|
# sha256: 930 ups
|
|
|
|
from itertools import count
|
|
|
|
from twisted.python import usage
|
|
|
|
|
|
|
|
def abbreviate_space(s, SI=True):
|
|
|
|
if s is None:
|
|
|
|
return "unknown"
|
|
|
|
if SI:
|
|
|
|
U = 1000.0
|
|
|
|
isuffix = "B"
|
|
|
|
else:
|
|
|
|
U = 1024.0
|
|
|
|
isuffix = "iB"
|
|
|
|
def r(count, suffix):
|
|
|
|
return "%.2f %s%s" % (count, suffix, isuffix)
|
|
|
|
|
|
|
|
if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode
|
|
|
|
return "%d B" % s
|
|
|
|
if s < U*U:
|
|
|
|
return r(s/U, "k")
|
|
|
|
if s < U*U*U:
|
|
|
|
return r(s/(U*U), "M")
|
|
|
|
if s < U*U*U*U:
|
|
|
|
return r(s/(U*U*U), "G")
|
|
|
|
if s < U*U*U*U*U:
|
|
|
|
return r(s/(U*U*U*U), "T")
|
|
|
|
return r(s/(U*U*U*U*U), "P")
|
|
|
|
|
2009-12-26 21:48:32 -05:00
|
|
|
def make_up_a_file_size(seed):
|
|
|
|
h = int(myhash(seed).hexdigest(),16)
|
2018-03-27 23:51:02 -07:00
|
|
|
# exponential distribution
|
|
|
|
e = 8 + (h % (31-8))
|
|
|
|
return 2 ** e
|
2009-12-26 21:48:32 -05:00
|
|
|
# uniform distribution
|
2017-12-31 00:19:41 +01:00
|
|
|
#max=2**31
|
|
|
|
#return h % max # avg 1GB
|
2009-12-26 21:48:32 -05:00
|
|
|
|
|
|
|
sizes = [make_up_a_file_size(str(i)) for i in range(10000)]
|
2009-12-26 01:03:39 -05:00
|
|
|
avg_filesize = sum(sizes)/len(sizes)
|
2019-03-22 11:40:58 +01:00
|
|
|
print("average file size:", abbreviate_space(avg_filesize))
|
2009-12-26 01:03:39 -05:00
|
|
|
|
2009-12-26 21:48:32 -05:00
|
|
|
SERVER_CAPACITY = 10**12
|
2009-12-26 01:03:39 -05:00
|
|
|
|
2019-05-15 08:17:44 +02:00
|
|
|
class Server(object):
|
2009-12-26 01:03:39 -05:00
|
|
|
def __init__(self, nodeid, capacity):
|
|
|
|
self.nodeid = nodeid
|
|
|
|
self.used = 0
|
|
|
|
self.capacity = capacity
|
|
|
|
self.numshares = 0
|
|
|
|
self.full_at_tick = None
|
|
|
|
|
|
|
|
def upload(self, sharesize):
|
|
|
|
if self.used + sharesize < self.capacity:
|
|
|
|
self.used += sharesize
|
|
|
|
self.numshares += 1
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
if self.full_at_tick is not None:
|
|
|
|
return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick)
|
|
|
|
else:
|
|
|
|
return "<%s %s>" % (self.__class__.__name__, self.nodeid)
|
|
|
|
|
2019-05-15 08:17:44 +02:00
|
|
|
class Ring(object):
|
2009-12-26 21:48:32 -05:00
|
|
|
SHOW_MINMAX = False
|
2009-12-26 01:03:39 -05:00
|
|
|
def __init__(self, numservers, seed, permute):
|
|
|
|
self.servers = []
|
|
|
|
for i in range(numservers):
|
2009-12-26 21:48:32 -05:00
|
|
|
nodeid = myhash(str(seed)+str(i)).hexdigest()
|
2009-12-26 01:03:39 -05:00
|
|
|
capacity = SERVER_CAPACITY
|
|
|
|
s = Server(nodeid, capacity)
|
|
|
|
self.servers.append(s)
|
|
|
|
self.servers.sort(key=lambda s: s.nodeid)
|
2009-12-26 21:48:32 -05:00
|
|
|
self.permute = permute
|
|
|
|
#self.list_servers()
|
|
|
|
|
|
|
|
def list_servers(self):
|
2009-12-26 01:03:39 -05:00
|
|
|
for i in range(len(self.servers)):
|
|
|
|
s = self.servers[i]
|
|
|
|
next_s = self.servers[(i+1)%len(self.servers)]
|
|
|
|
diff = "%032x" % (int(next_s.nodeid,16) - int(s.nodeid,16))
|
|
|
|
s.next_diff = diff
|
|
|
|
prev_s = self.servers[(i-1)%len(self.servers)]
|
|
|
|
diff = "%032x" % (int(s.nodeid,16) - int(prev_s.nodeid,16))
|
|
|
|
s.prev_diff = diff
|
2019-03-22 11:40:58 +01:00
|
|
|
print(s, s.prev_diff)
|
2009-12-26 01:03:39 -05:00
|
|
|
|
2019-03-22 11:40:58 +01:00
|
|
|
print("sorted by delta")
|
2009-12-26 01:03:39 -05:00
|
|
|
for s in sorted(self.servers, key=lambda s:s.prev_diff):
|
2019-03-22 11:40:58 +01:00
|
|
|
print(s, s.prev_diff)
|
2009-12-26 01:03:39 -05:00
|
|
|
|
|
|
|
def servers_for_si(self, si):
|
|
|
|
if self.permute:
|
|
|
|
def sortkey(s):
|
2009-12-26 21:48:32 -05:00
|
|
|
return myhash(s.nodeid+si).digest()
|
2009-12-26 01:03:39 -05:00
|
|
|
return sorted(self.servers, key=sortkey)
|
|
|
|
for i in range(len(self.servers)):
|
|
|
|
if self.servers[i].nodeid >= si:
|
|
|
|
return self.servers[i:] + self.servers[:i]
|
|
|
|
return list(self.servers)
|
|
|
|
|
|
|
|
def show_servers(self, picked):
|
|
|
|
bits = []
|
|
|
|
for s in self.servers:
|
|
|
|
if s in picked:
|
|
|
|
bits.append("1")
|
|
|
|
else:
|
|
|
|
bits.append("0")
|
|
|
|
#d = [s in picked and "1" or "0" for s in self.servers]
|
|
|
|
return "".join(bits)
|
|
|
|
|
|
|
|
def dump_usage(self, numfiles, avg_space_per_file):
|
2019-03-22 11:40:58 +01:00
|
|
|
print("uploaded", numfiles)
|
2009-12-26 01:03:39 -05:00
|
|
|
# avg_space_per_file measures expected grid-wide ciphertext per file
|
|
|
|
used = list(reversed(sorted([s.used for s in self.servers])))
|
|
|
|
# used is actual per-server ciphertext
|
|
|
|
usedpf = [1.0*u/numfiles for u in used]
|
|
|
|
# usedpf is actual per-server-per-file ciphertext
|
2020-09-11 10:28:22 -04:00
|
|
|
#print("min/max usage: %s/%s" % (abbreviate_space(used[-1]),
|
|
|
|
# abbreviate_space(used[0])))
|
2009-12-26 01:03:39 -05:00
|
|
|
avg_usage_per_file = avg_space_per_file/len(self.servers)
|
|
|
|
# avg_usage_per_file is expected per-server-per-file ciphertext
|
|
|
|
spreadpf = usedpf[0] - usedpf[-1]
|
|
|
|
average_usagepf = sum(usedpf) / len(usedpf)
|
|
|
|
variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1)
|
|
|
|
std_deviation = math.sqrt(variance)
|
|
|
|
sd_of_total = std_deviation / avg_usage_per_file
|
|
|
|
|
2019-03-22 11:40:58 +01:00
|
|
|
print("min/max/(exp) usage-pf-ps %s/%s/(%s):" % (
|
2009-12-26 01:03:39 -05:00
|
|
|
abbreviate_space(usedpf[-1]),
|
|
|
|
abbreviate_space(usedpf[0]),
|
2019-03-22 11:40:58 +01:00
|
|
|
abbreviate_space(avg_usage_per_file) ), end=' ')
|
|
|
|
print("spread-pf: %s (%.2f%%)" % (
|
|
|
|
abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file), end=' ')
|
2020-09-11 10:28:22 -04:00
|
|
|
#print("average_usage:", abbreviate_space(average_usagepf))
|
2019-03-22 11:40:58 +01:00
|
|
|
print("stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
|
|
|
|
100.0*sd_of_total))
|
2009-12-26 21:48:32 -05:00
|
|
|
if self.SHOW_MINMAX:
|
|
|
|
s2 = sorted(self.servers, key=lambda s: s.used)
|
2019-03-22 11:40:58 +01:00
|
|
|
print("least:", s2[0].nodeid)
|
|
|
|
print("most:", s2[-1].nodeid)
|
2009-12-26 01:03:39 -05:00
|
|
|
|
|
|
|
|
|
|
|
class Options(usage.Options):
|
|
|
|
optParameters = [
|
|
|
|
("k", "k", 3, "required shares", int),
|
|
|
|
("N", "N", 10, "total shares", int),
|
|
|
|
("servers", None, 100, "number of servers", int),
|
|
|
|
("seed", None, None, "seed to use for creating ring"),
|
2009-12-26 21:48:32 -05:00
|
|
|
("fileseed", None, "blah", "seed to use for creating files"),
|
2009-12-26 01:03:39 -05:00
|
|
|
("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
|
|
|
|
]
|
|
|
|
def postOptions(self):
|
|
|
|
assert self["seed"]
|
|
|
|
|
|
|
|
|
|
|
|
def do_run(ring, opts):
|
|
|
|
avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
|
2009-12-26 21:48:32 -05:00
|
|
|
fileseed = opts["fileseed"]
|
|
|
|
all_servers_have_room = True
|
|
|
|
no_files_have_wrapped = True
|
2009-12-26 01:03:39 -05:00
|
|
|
for filenum in count(0):
|
|
|
|
#used = list(reversed(sorted([s.used for s in ring.servers])))
|
|
|
|
#used = [s.used for s in ring.servers]
|
2020-09-11 10:28:22 -04:00
|
|
|
#print(used)
|
2009-12-26 21:48:32 -05:00
|
|
|
si = myhash(fileseed+str(filenum)).hexdigest()
|
|
|
|
filesize = make_up_a_file_size(si)
|
2009-12-26 01:03:39 -05:00
|
|
|
sharesize = filesize / opts["k"]
|
|
|
|
if filenum%4000==0 and filenum > 1:
|
|
|
|
ring.dump_usage(filenum, avg_space_per_file)
|
|
|
|
servers = ring.servers_for_si(si)
|
2020-09-11 10:28:22 -04:00
|
|
|
#print(ring.show_servers(servers[:opts["N"]]))
|
2009-12-26 01:03:39 -05:00
|
|
|
remaining_shares = opts["N"]
|
|
|
|
index = 0
|
2009-12-26 21:48:32 -05:00
|
|
|
server_was_full = False
|
|
|
|
file_was_wrapped = False
|
|
|
|
remaining_servers = set(servers)
|
2009-12-26 01:03:39 -05:00
|
|
|
while remaining_shares:
|
2009-12-26 21:48:32 -05:00
|
|
|
if index >= len(servers):
|
|
|
|
index = 0
|
|
|
|
file_was_wrapped = True
|
2009-12-26 01:03:39 -05:00
|
|
|
s = servers[index]
|
|
|
|
accepted = s.upload(sharesize)
|
|
|
|
if not accepted:
|
2009-12-26 21:48:32 -05:00
|
|
|
server_was_full = True
|
|
|
|
remaining_servers.discard(s)
|
|
|
|
if not remaining_servers:
|
2019-03-22 11:40:58 +01:00
|
|
|
print("-- GRID IS FULL")
|
2009-12-26 21:48:32 -05:00
|
|
|
ring.dump_usage(filenum, avg_space_per_file)
|
|
|
|
return filenum
|
|
|
|
index += 1
|
|
|
|
continue
|
2009-12-26 01:03:39 -05:00
|
|
|
remaining_shares -= 1
|
|
|
|
index += 1
|
2009-12-26 21:48:32 -05:00
|
|
|
# file is done being uploaded
|
|
|
|
|
|
|
|
if server_was_full and all_servers_have_room:
|
|
|
|
all_servers_have_room = False
|
2019-03-22 11:40:58 +01:00
|
|
|
print("-- FIRST SERVER FULL")
|
2009-12-26 21:48:32 -05:00
|
|
|
ring.dump_usage(filenum, avg_space_per_file)
|
|
|
|
if file_was_wrapped and no_files_have_wrapped:
|
|
|
|
no_files_have_wrapped = False
|
2019-03-22 11:40:58 +01:00
|
|
|
print("-- FIRST FILE WRAPPED")
|
2009-12-26 21:48:32 -05:00
|
|
|
ring.dump_usage(filenum, avg_space_per_file)
|
2009-12-26 01:03:39 -05:00
|
|
|
|
|
|
|
|
|
|
|
def do_ring(opts):
|
|
|
|
total_capacity = opts["servers"]*SERVER_CAPACITY
|
|
|
|
avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
|
|
|
|
avg_files = total_capacity / avg_space_per_file
|
2019-03-22 11:40:58 +01:00
|
|
|
print("expected number of uploads:", avg_files)
|
2009-12-26 01:03:39 -05:00
|
|
|
if opts["permute"]:
|
2019-03-22 11:40:58 +01:00
|
|
|
print(" PERMUTED")
|
2009-12-26 01:03:39 -05:00
|
|
|
else:
|
2019-03-22 11:40:58 +01:00
|
|
|
print(" LINEAR")
|
2009-12-26 01:03:39 -05:00
|
|
|
seed = opts["seed"]
|
|
|
|
|
|
|
|
ring = Ring(opts["servers"], seed, opts["permute"])
|
2011-10-07 03:30:31 +00:00
|
|
|
do_run(ring, opts)
|
2009-12-26 01:03:39 -05:00
|
|
|
|
|
|
|
def run(opts):
|
|
|
|
do_ring(opts)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
opts = Options()
|
|
|
|
opts.parseOptions()
|
|
|
|
run(opts)
|