tahoe-lafs/misc/munin/tahoe_server_latency_

#!/usr/bin/env python

# retrieve a latency statistic for a given operation and percentile from a
# set of storage servers.

# the OPERATION value should come from the following list:
#   allocate:   allocate_buckets, first step to upload an immutable file
#    write: write data to an immutable share
#    close: finish writing to an immutable share
#    cancel: abandon a partial immutable share
#   get: get_buckets, first step to download an immutable file
#    read: read data from an immutable share
#   writev: slot_testv_and_readv_and_writev, modify/create a directory
#   readv: read a directory (or mutable file)

# the PERCENTILE value should come from the following list:
#  01_0:   1%
#  10_0:  10%
#  50_0:  50% (median)
#  90_0:  90%
#  99_0:  99%
#  99_9:  99.9%
#  mean:

# To use this, create a symlink from
# /etc/munin/plugins/tahoe_server_latency_OPERATION_PERCENTILE to this
# script. For example:

# ln -s /usr/share/doc/allmydata-tahoe/munin/tahoe_server_latency_ \
#  /etc/munin/plugins/tahoe_server_latency_allocate_99_9

# Also, you will need to put a list of node statistics URLs in the plugin's
# environment, by adding a stanza like the following to a file in
# /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_latencies:
#
# [tahoe_server_latency*]
# env.url_storage1 http://localhost:9011/statistics?t=json
# env.url_storage2 http://localhost:9012/statistics?t=json
# env.url_storage3 http://localhost:9013/statistics?t=json
# env.url_storage4 http://localhost:9014/statistics?t=json

# of course, these URLs must match the webports you have configured into the
# storage nodes.

import os, sys
import urllib
import simplejson

node_urls = []
for k,v in os.environ.items():
    if k.startswith("url_"):
        nodename = k[len("url_"):]
        node_urls.append( (nodename, v) )
node_urls.sort()

my_name = os.path.basename(sys.argv[0])
PREFIX = "tahoe_server_latency_"
assert my_name.startswith(PREFIX)
my_name = my_name[len(PREFIX):]
(operation, percentile) = my_name.split("_", 1)
if percentile == "mean":
    what = "mean"
else:
    what = percentile.replace("_", ".") + "th percentile"

configinfo = \
"""graph_title Tahoe Server '%(operation)s' Latency (%(what)s)
graph_vlabel seconds
graph_category tahoe
graph_info This graph shows how long '%(operation)s' operations took on the storage server, the %(what)s delay between message receipt and response generation, calculated over the last thousand operations.
""" % {'operation': operation,
       'what': what}

for nodename, url in node_urls:
    configinfo += "%s.label %s\n" % (nodename, nodename)
    configinfo += "%s.draw LINE2\n" % (nodename,)


if len(sys.argv) > 1:
    if sys.argv[1] == "config":
        print configinfo.rstrip()
        sys.exit(0)

for nodename, url in node_urls:
    data = simplejson.loads(urllib.urlopen(url).read())
    if percentile == "mean":
        p_key = "mean"
    else:
        p_key = percentile + "_percentile"
    key = "storage_server.latencies.%s.%s" % (operation, p_key)
    value = data["stats"][key]
    print "%s.value %s" % (nodename, value)