mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-19 21:17:54 +00:00
util/statistics: add tests, fix mean_repair_cost
This commit is contained in:
parent
812f23585d
commit
e1380b132b
@ -1,7 +1,8 @@
|
||||
|
||||
def foo(): pass # keep the line number constant
|
||||
|
||||
import os, time, random
|
||||
import os, time
|
||||
from StringIO import StringIO
|
||||
from twisted.trial import unittest
|
||||
from twisted.internet import defer, reactor
|
||||
from twisted.python import failure
|
||||
@ -206,6 +207,13 @@ class Statistics(unittest.TestCase):
|
||||
self.should_assert("Should assert if not 0<=p<=1", f, 1, -1)
|
||||
self.should_assert("Should assert if n < 1", f, 0, .1)
|
||||
|
||||
out = StringIO()
|
||||
statistics.print_pmf(pmf_comp, out=out)
|
||||
lines = out.getvalue().splitlines()
|
||||
self.failUnlessEqual(lines[0], "i=0: 0.81")
|
||||
self.failUnlessEqual(lines[1], "i=1: 0.18")
|
||||
self.failUnlessEqual(lines[2], "i=2: 0.01")
|
||||
|
||||
def test_survival_pmf(self):
|
||||
f = statistics.survival_pmf
|
||||
# Cross-check binomial-distribution method against convolution
|
||||
@ -217,7 +225,39 @@ class Statistics(unittest.TestCase):
|
||||
self.failUnlessTrue(statistics.valid_pmf(pmf1))
|
||||
self.should_assert("Should assert if p_i > 1", f, [1.1]);
|
||||
self.should_assert("Should assert if p_i < 0", f, [-.1]);
|
||||
|
||||
|
||||
def test_repair_count_pmf(self):
|
||||
survival_pmf = statistics.binomial_distribution_pmf(5, .9)
|
||||
repair_pmf = statistics.repair_count_pmf(survival_pmf, 3)
|
||||
# repair_pmf[0] == sum(survival_pmf[0,1,2,5])
|
||||
# repair_pmf[1] == survival_pmf[4]
|
||||
# repair_pmf[2] = survival_pmf[3]
|
||||
self.failUnlessListAlmostEqual(repair_pmf,
|
||||
[0.00001 + 0.00045 + 0.0081 + 0.59049,
|
||||
.32805,
|
||||
.0729,
|
||||
0, 0, 0])
|
||||
|
||||
def test_repair_cost(self):
|
||||
survival_pmf = statistics.binomial_distribution_pmf(5, .9)
|
||||
bwcost = statistics.bandwidth_cost_function
|
||||
cost = statistics.mean_repair_cost(bwcost, 1000,
|
||||
survival_pmf, 3, ul_dl_ratio=1.0)
|
||||
self.failUnlessAlmostEqual(cost, 558.90)
|
||||
cost = statistics.mean_repair_cost(bwcost, 1000,
|
||||
survival_pmf, 3, ul_dl_ratio=8.0)
|
||||
self.failUnlessAlmostEqual(cost, 1664.55)
|
||||
|
||||
# I haven't manually checked the math beyond here -warner
|
||||
cost = statistics.eternal_repair_cost(bwcost, 1000,
|
||||
survival_pmf, 3,
|
||||
discount_rate=0, ul_dl_ratio=1.0)
|
||||
self.failUnlessAlmostEqual(cost, 65292.056074766246)
|
||||
cost = statistics.eternal_repair_cost(bwcost, 1000,
|
||||
survival_pmf, 3,
|
||||
discount_rate=0.05,
|
||||
ul_dl_ratio=1.0)
|
||||
self.failUnlessAlmostEqual(cost, 9133.6097158191551)
|
||||
|
||||
def test_convolve(self):
|
||||
f = statistics.convolve
|
||||
@ -250,7 +290,7 @@ class Statistics(unittest.TestCase):
|
||||
def test_find_k(self):
|
||||
f = statistics.find_k
|
||||
g = statistics.pr_file_loss
|
||||
plist = [.9] * 10 + [.8] * 10
|
||||
plist = [.9] * 10 + [.8] * 10 # N=20
|
||||
t = .0001
|
||||
k = f(plist, t)
|
||||
self.failUnlessEqual(k, 10)
|
||||
|
@ -4,7 +4,7 @@
|
||||
from __future__ import division
|
||||
from mathutil import round_sigfigs
|
||||
import math
|
||||
import array
|
||||
import sys
|
||||
|
||||
def pr_file_loss(p_list, k):
|
||||
"""
|
||||
@ -87,13 +87,13 @@ def survival_pmf_via_conv(p_list):
|
||||
pmf_list = [ [1 - p, p] for p in p_list ];
|
||||
return reduce(convolve, pmf_list)
|
||||
|
||||
def print_pmf(pmf, n=4):
|
||||
def print_pmf(pmf, n=4, out=sys.stdout):
|
||||
"""
|
||||
Print a PMF in a readable form, with values rounded to n
|
||||
significant digits.
|
||||
"""
|
||||
for k, p in enumerate(pmf):
|
||||
print "i=" + str(k) + ":", round_sigfigs(p, n)
|
||||
print >>out, "i=" + str(k) + ":", round_sigfigs(p, n)
|
||||
|
||||
def pr_backup_file_loss(p_list, backup_p, k):
|
||||
"""
|
||||
@ -136,6 +136,7 @@ def find_k_from_pmf(pmf, target_loss_prob):
|
||||
if loss_prob > target_loss_prob:
|
||||
return k
|
||||
|
||||
# we shouldn't be able to get here, since sum(pmf)==1.0
|
||||
k = len(pmf) - 1
|
||||
return k
|
||||
|
||||
@ -166,23 +167,27 @@ def repair_count_pmf(survival_pmf, k):
|
||||
def bandwidth_cost_function(file_size, shares, k, ul_dl_ratio):
|
||||
return file_size + float(file_size) / k * shares * ul_dl_ratio
|
||||
|
||||
def mean_repair_cost(cost_function, file_size, survival_pmf, k):
|
||||
def mean_repair_cost(cost_function, file_size, survival_pmf, k, ul_dl_ratio):
|
||||
"""
|
||||
Return the expected cost for a repair run on a file with the given
|
||||
survival_pmf and requiring k shares.
|
||||
survival_pmf and requiring k shares, in which upload cost is
|
||||
'ul_dl_ratio' times download cost.
|
||||
"""
|
||||
repair_pmf = repair_count_pmf(survival_pmf, k)
|
||||
exp_cnt = sum([d * repair_pmf[d] for d in range(1, len(repair_pmf))])
|
||||
return cost_function(file_size, exp_cnt, k)
|
||||
expected_cost = sum([cost_function(file_size, new_shares, k, ul_dl_ratio)
|
||||
* repair_pmf[new_shares]
|
||||
for new_shares in range(1, len(repair_pmf))])
|
||||
return expected_cost
|
||||
|
||||
def eternal_repair_cost(cost_function, file_size, survival_pmf, k, discount_rate=0):
|
||||
def eternal_repair_cost(cost_function, file_size, survival_pmf, k,
|
||||
discount_rate=0, ul_dl_ratio=1.0):
|
||||
"""
|
||||
Calculate the eternal repair cost for a file that is aggressively
|
||||
repaired.
|
||||
repaired, i.e. the sum of repair costs until the file is dead.
|
||||
"""
|
||||
c = mean_repair_cost(cost_function, file_size, survival_pmf, k)
|
||||
c = mean_repair_cost(cost_function, file_size, survival_pmf, k, ul_dl_ratio)
|
||||
f = 1 - sum(survival_pmf[0:k])
|
||||
r = discount_rate
|
||||
r = float(discount_rate)
|
||||
|
||||
return (c * (1-r)) / (1 - (1-r) * f)
|
||||
|
||||
@ -259,9 +264,6 @@ def binomial_coeff(n, k):
|
||||
"""
|
||||
assert n >= k
|
||||
|
||||
if k > n:
|
||||
return 0
|
||||
|
||||
if k > n/2:
|
||||
k = n - k
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user