tahoe-lafs/misc/operations_helpers/munin/tahoe_estimate_files

#!/usr/bin/env python


import sys, os.path

if len(sys.argv) > 1 and sys.argv[1] == "config":
    print("""\
graph_title Tahoe File Estimate
graph_vlabel files
graph_category tahoe
graph_info This graph shows the estimated number of files and directories present in the grid
files.label files
files.draw LINE2""")
    sys.exit(0)

# Edit this to point at some subset of storage directories.
node_dirs = [os.path.expanduser("~amduser/prodnet/storage1"),
             os.path.expanduser("~amduser/prodnet/storage2"),
             os.path.expanduser("~amduser/prodnet/storage3"),
             os.path.expanduser("~amduser/prodnet/storage4"),
             ]

sections = ["aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj"]
# and edit this to reflect your default encoding's "total_shares" value, and
# the total number of servers.
N = 10
num_servers = 20

index_strings = set()
for base in node_dirs:
    for section in sections:
        sampledir = os.path.join(base, "storage", "shares", section)
        indices = os.listdir(sampledir)
        index_strings.update(indices)
unique_strings = len(index_strings)

# the chance that any given file appears on any given server
chance = 1.0 * N / num_servers

# the chance that the file does *not* appear on the servers that we're
# examining
no_chance = (1-chance) ** len(node_dirs)

# if a file has a 25% chance of not appearing in our sample, then we need to
# raise our estimate by (1.25/1)
correction = 1+no_chance
#print "correction", correction

files = unique_strings * (32*32/len(sections)) * correction
print("files.value %d" % int(files))
shebang: replace "/usr/bin/python" with "/usr/bin/env python" 2008-11-04 18:03:06 -07:00			`#!/usr/bin/env python`
munin/tahoe_estimate_files.py: tool to estimate the total number of slots (mutable and immutable combined) in the grid, from a small sample 2008-04-24 11:28:35 -07:00
use print function over print statement in ./misc/ directory 2019-03-22 11:40:58 +01:00
munin/tahoe_estimate_files.py: tool to estimate the total number of slots (mutable and immutable combined) in the grid, from a small sample 2008-04-24 11:28:35 -07:00			`import sys, os.path`

			`if len(sys.argv) > 1 and sys.argv[1] == "config":`
use print function over print statement in ./misc/ directory 2019-03-22 11:40:58 +01:00			`print("""\`
munin/tahoe_estimate_files.py: tool to estimate the total number of slots (mutable and immutable combined) in the grid, from a small sample 2008-04-24 11:28:35 -07:00			`graph_title Tahoe File Estimate`
			`graph_vlabel files`
			`graph_category tahoe`
			`graph_info This graph shows the estimated number of files and directories present in the grid`
			`files.label files`
use print function over print statement in ./misc/ directory 2019-03-22 11:40:58 +01:00			`files.draw LINE2""")`
munin/tahoe_estimate_files.py: tool to estimate the total number of slots (mutable and immutable combined) in the grid, from a small sample 2008-04-24 11:28:35 -07:00			`sys.exit(0)`

			`# Edit this to point at some subset of storage directories.`
			`node_dirs = [os.path.expanduser("~amduser/prodnet/storage1"),`
			`os.path.expanduser("~amduser/prodnet/storage2"),`
			`os.path.expanduser("~amduser/prodnet/storage3"),`
			`os.path.expanduser("~amduser/prodnet/storage4"),`
			`]`

			`sections = ["aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj"]`
			`# and edit this to reflect your default encoding's "total_shares" value, and`
			`# the total number of servers.`
			`N = 10`
			`num_servers = 20`

			`index_strings = set()`
			`for base in node_dirs:`
			`for section in sections:`
			`sampledir = os.path.join(base, "storage", "shares", section)`
			`indices = os.listdir(sampledir)`
			`index_strings.update(indices)`
			`unique_strings = len(index_strings)`

			`# the chance that any given file appears on any given server`
			`chance = 1.0 * N / num_servers`

			`# the chance that the file does not appear on the servers that we're`
			`# examining`
			`no_chance = (1-chance) ** len(node_dirs)`

			`# if a file has a 25% chance of not appearing in our sample, then we need to`
			`# raise our estimate by (1.25/1)`
			`correction = 1+no_chance`
			`#print "correction", correction`

			`files = unique_strings * (3232/len(sections)) correction`
use print function over print statement in ./misc/ directory 2019-03-22 11:40:58 +01:00			`print("files.value %d" % int(files))`