mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-19 04:57:54 +00:00
misc/storage-overhead.py: tool to estimate storage-space overhead per filesize
This commit is contained in:
parent
56dcb814a8
commit
e2e3b19a37
85
misc/storage-overhead.py
Normal file
85
misc/storage-overhead.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
#! /usr/bin/python
|
||||||
|
|
||||||
|
import sys, math
|
||||||
|
from cStringIO import StringIO
|
||||||
|
from allmydata import upload, uri, encode, storage
|
||||||
|
from allmydata.util import mathutil
|
||||||
|
|
||||||
|
def roundup(size, blocksize=4096):
|
||||||
|
return blocksize * mathutil.div_ceil(size, blocksize)
|
||||||
|
|
||||||
|
|
||||||
|
class BigFakeString:
|
||||||
|
def __init__(self, length):
|
||||||
|
self.length = length
|
||||||
|
self.fp = 0
|
||||||
|
def seek(self, offset, whence=0):
|
||||||
|
if whence == 0:
|
||||||
|
self.fp = offset
|
||||||
|
elif whence == 1:
|
||||||
|
self.fp += offset
|
||||||
|
elif whence == 2:
|
||||||
|
self.fp = self.length - offset
|
||||||
|
def tell(self):
|
||||||
|
return self.fp
|
||||||
|
|
||||||
|
def calc(filesize, params=(3,7,10), segsize=encode.Encoder.MAX_SEGMENT_SIZE):
|
||||||
|
num_shares = params[2]
|
||||||
|
if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD:
|
||||||
|
urisize = len(uri.pack_lit("A"*filesize))
|
||||||
|
sharesize = 0
|
||||||
|
sharespace = 0
|
||||||
|
else:
|
||||||
|
u = upload.FileUploader(None)
|
||||||
|
u.set_params(params)
|
||||||
|
# unfortunately, Encoder doesn't currently lend itself to answering
|
||||||
|
# this question without measuring a filesize, so we have to give it a
|
||||||
|
# fake one
|
||||||
|
data = BigFakeString(filesize)
|
||||||
|
u.set_filehandle(data)
|
||||||
|
u.set_encryption_key("a"*16)
|
||||||
|
sharesize, blocksize = u.setup_encoder()
|
||||||
|
# how much overhead?
|
||||||
|
# 0x20 bytes of offsets
|
||||||
|
# 0x04 bytes of extension length
|
||||||
|
# 0x1ad bytes of extension (=429)
|
||||||
|
# total is 465 bytes
|
||||||
|
num_segments = mathutil.div_ceil(filesize, segsize)
|
||||||
|
num_share_hashes = int(math.log(mathutil.next_power_of_k(num_shares, 2),
|
||||||
|
2)) + 1
|
||||||
|
sharesize = storage.allocated_size(sharesize, num_segments,
|
||||||
|
num_share_hashes,
|
||||||
|
429)
|
||||||
|
sharespace = num_shares * roundup(sharesize)
|
||||||
|
urisize = len(uri.pack_uri(storage_index="a"*32,
|
||||||
|
key="a"*16,
|
||||||
|
uri_extension_hash="a"*32,
|
||||||
|
needed_shares=params[0],
|
||||||
|
total_shares=params[2],
|
||||||
|
size=filesize))
|
||||||
|
|
||||||
|
return urisize, sharesize, sharespace
|
||||||
|
|
||||||
|
def main():
|
||||||
|
filesize = int(sys.argv[1])
|
||||||
|
urisize, sharesize, sharespace = calc(filesize)
|
||||||
|
print "urisize:", urisize
|
||||||
|
print "sharesize: %10d" % sharesize
|
||||||
|
print "sharespace: %10d" % sharespace
|
||||||
|
print "desired expansion: %1.1f" % (1.0 * 10 / 3)
|
||||||
|
print "effective expansion: %1.1f" % (1.0 * sharespace / filesize)
|
||||||
|
|
||||||
|
def chart():
|
||||||
|
filesize = 2
|
||||||
|
while filesize < 2**20:
|
||||||
|
urisize, sharesize, sharespace = calc(int(filesize))
|
||||||
|
expansion = 1.0 * sharespace / int(filesize)
|
||||||
|
print "%d,%d,%d,%1.2f" % (int(filesize), urisize, sharespace, expansion)
|
||||||
|
filesize = filesize * 2**0.5
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if sys.argv[1] == "chart":
|
||||||
|
chart()
|
||||||
|
else:
|
||||||
|
main()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user