tahoe-lafs/src/allmydata/node.py

305 lines
12 KiB
Python
Raw Normal View History

import datetime, os.path, re, types, resource
from base64 import b32decode, b32encode
import twisted
from twisted.python import log
from twisted.application import service
from twisted.internet import defer, reactor
from foolscap import Tub, eventual
from allmydata.util import iputil, observer, humanreadable
2007-05-24 00:54:48 +00:00
from allmydata.util.assertutil import precondition
from allmydata.logpublisher import LogPublisher
# Just to get their versions:
import allmydata, foolscap, pycryptopp, zfec
# group 1 will be addr (dotted quad string), group 3 if any will be portnum (string)
ADDR_RE=re.compile("^([1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*)(:([1-9][0-9]*))?$")
def formatTimeTahoeStyle(self, when):
# we want UTC timestamps that look like:
# 2007-10-12 00:26:28.566Z [Client] rnp752lz: 'client running'
d = datetime.datetime.utcfromtimestamp(when)
if d.microsecond:
return d.isoformat(" ")[:-3]+"Z"
else:
return d.isoformat(" ") + ".000Z"
class Node(service.MultiService):
# this implements common functionality of both Client nodes and Introducer
# nodes.
NODETYPE = "unknown NODETYPE"
PORTNUMFILE = None
CERTFILE = "node.pem"
LOCAL_IP_FILE = "advertised_ip_addresses"
def __init__(self, basedir="."):
service.MultiService.__init__(self)
self.basedir = os.path.abspath(basedir)
self._tub_ready_observerlist = observer.OneShotObserverList()
certfile = os.path.join(self.basedir, self.CERTFILE)
self.tub = Tub(certFile=certfile)
os.chmod(certfile, 0600)
self.tub.setOption("logLocalFailures", True)
self.tub.setOption("logRemoteFailures", True)
self.nodeid = b32decode(self.tub.tubID.upper()) # binary format
self.write_config("my_nodeid", b32encode(self.nodeid).lower() + "\n")
self.short_nodeid = b32encode(self.nodeid).lower()[:8] # ready for printing
assert self.PORTNUMFILE, "Your node.Node subclass must provide PORTNUMFILE"
self._portnumfile = os.path.join(self.basedir, self.PORTNUMFILE)
try:
portnum = int(open(self._portnumfile, "rU").read())
except (EnvironmentError, ValueError):
portnum = 0
self.tub.listenOn("tcp:%d" % portnum)
# we must wait until our service has started before we can find out
# our IP address and thus do tub.setLocation, and we can't register
# any services with the Tub until after that point
self.tub.setServiceParent(self)
self.logSource="Node"
AUTHKEYSFILEBASE = "authorized_keys."
for f in os.listdir(self.basedir):
if f.startswith(AUTHKEYSFILEBASE):
keyfile = os.path.join(self.basedir, f)
portnum = int(f[len(AUTHKEYSFILEBASE):])
from allmydata import manhole
m = manhole.AuthorizedKeysManhole(portnum, keyfile)
m.setServiceParent(self)
self.log("AuthorizedKeysManhole listening on %d" % portnum)
self.setup_logging()
self.log("Node constructed. tahoe version: %s, foolscap: %s,"
" twisted: %s, zfec: %s"
% (allmydata.__version__, foolscap.__version__,
twisted.__version__, zfec.__version__,))
self.increase_rlimits()
def increase_rlimits(self):
# We'd like to raise our soft resource.RLIMIT_NOFILE, since certain
# systems (OS-X, probably solaris) start with a relatively low limit
# (256), and some unit tests want to open up more sockets than this.
# Most linux systems start with both hard and soft limits at 1024,
# which is plenty.
# unfortunately the values to pass to setrlimit() vary widely from
# one system to another. OS-X reports (256, HUGE), but the real hard
# limit is 10240, and accepts (-1,-1) to mean raise it to the
# maximum. Cygwin reports (256, -1), then ignores a request of
# (-1,-1): instead you have to guess at the hard limit (it appears to
# be 3200), so using (3200,-1) seems to work. Linux reports a
# sensible (1024,1024), then rejects (-1,-1) as trying to raise the
# maximum limit, so you could set it to (1024,1024) but you might as
# well leave it alone.
try:
2007-11-16 06:09:02 +00:00
current = resource.getrlimit(resource.RLIMIT_NOFILE)
except AttributeError:
# we're probably missing RLIMIT_NOFILE, maybe this is windows
return
2007-11-16 06:09:02 +00:00
if current[0] >= 1024:
# good enough, leave it alone
return
try:
if current[1] > 0 and current[1] < 1000000:
# solaris reports (256, 65536)
resource.setrlimit(resource.RLIMIT_NOFILE,
(current[1], current[1]))
else:
# this one works on OS-X (bsd), and gives us 10240, but
# it doesn't work on linux (on which both the hard and
# soft limits are set to 1024 by default).
resource.setrlimit(resource.RLIMIT_NOFILE, (-1,-1))
new = resource.getrlimit(resource.RLIMIT_NOFILE)
2007-11-16 06:09:02 +00:00
if new[0] == current[0]:
# probably cygwin, which ignores -1. Use a real value.
resource.setrlimit(resource.RLIMIT_NOFILE, (3200,-1))
except ValueError:
self.log("unable to set RLIMIT_NOFILE: current value %s"
% (resource.getrlimit(resource.RLIMIT_NOFILE),))
except:
# who knows what. It isn't very important, so log it and continue
log.err()
def get_config(self, name, mode="r", required=False):
"""Get the (string) contents of a config file, or None if the file
did not exist. If required=True, raise an exception rather than
returning None. Any leading or trailing whitespace will be stripped
from the data."""
fn = os.path.join(self.basedir, name)
try:
return open(fn, mode).read().strip()
except EnvironmentError:
if not required:
return None
raise
def get_or_create_config(self, name, default_fn, mode="w", filemode=None):
"""Try to get the (string) contents of a config file, and return it.
Any leading or trailing whitespace will be stripped from the data.
If the file does not exist, try to create it using default_fn, and
then return the value that was written. If 'default_fn' is a string,
use it as a default value. If not, treat it as a 0-argument callable
which is expected to return a string.
"""
value = self.get_config(name)
if value is None:
if isinstance(default_fn, (str, unicode)):
value = default_fn
else:
value = default_fn()
fn = os.path.join(self.basedir, name)
try:
f = open(fn, mode)
f.write(value)
f.close()
if filemode is not None:
os.chmod(fn, filemode)
except EnvironmentError, e:
self.log("Unable to write config file '%s'" % fn)
self.log(e)
value = value.strip()
return value
def write_config(self, name, value, mode="w"):
"""Write a string to a config file."""
fn = os.path.join(self.basedir, name)
try:
open(fn, mode).write(value)
except EnvironmentError, e:
self.log("Unable to write config file '%s'" % fn)
self.log(e)
def get_versions(self):
return {'allmydata': allmydata.__version__,
'foolscap': foolscap.__version__,
'twisted': twisted.__version__,
'zfec': zfec.__version__,
'pycryptopp': pycryptopp.__version__,
}
2007-05-24 00:54:48 +00:00
def startService(self):
# Note: this class can be started and stopped at most once.
self.log("Node.startService")
# Delay until the reactor is running.
2007-05-24 00:54:48 +00:00
eventual.eventually(self._startService)
def _startService(self):
precondition(reactor.running)
self.log("Node._startService")
service.MultiService.startService(self)
d = defer.succeed(None)
d.addCallback(lambda res: iputil.get_local_addresses_async())
d.addCallback(self._setup_tub)
d.addCallback(lambda res: self.tub_ready())
def _ready(res):
self.log("%s running" % self.NODETYPE)
self._tub_ready_observerlist.fire(self)
return self
d.addCallback(_ready)
def _die(failure):
self.log('_startService() failed')
log.err(failure)
#reactor.stop() # for unknown reasons, reactor.stop() isn't working. [ ] TODO
self.log('calling os.abort()')
os.abort()
d.addErrback(_die)
def stopService(self):
self.log("Node.stopService")
d = self._tub_ready_observerlist.when_fired()
def _really_stopService(ignored):
self.log("Node._really_stopService")
return service.MultiService.stopService(self)
d.addCallback(_really_stopService)
return d
def shutdown(self):
"""Shut down the node. Returns a Deferred that fires (with None) when
it finally stops kicking."""
self.log("Node.shutdown")
return self.stopService()
def setup_logging(self):
# we replace the formatTime() method of the log observer that twistd
# set up for us, with a method that uses better timestamps.
for o in log.theLogPublisher.observers:
# o might be a FileLogObserver's .emit method
if type(o) is type(self.setup_logging): # bound method
ob = o.im_self
if isinstance(ob, log.FileLogObserver):
newmeth = types.UnboundMethodType(formatTimeTahoeStyle, ob, ob.__class__)
ob.formatTime = newmeth
# TODO: twisted >2.5.0 offers maxRotatedFiles=50
def log(self, msg, src="", args=()):
if src:
logsrc = src
else:
logsrc = self.logSource
if args:
try:
msg = msg % tuple(map(humanreadable.hr, args))
except TypeError, e:
msg = "ERROR: output string '%s' contained invalid %% expansion, error: %s, args: %s\n" % (`msg`, e, `args`)
log.callWithContext({"system":logsrc},
log.msg,
(self.short_nodeid + ": " + humanreadable.hr(msg)))
def _setup_tub(self, local_addresses):
# we can't get a dynamically-assigned portnum until our Tub is
# running, which means after startService.
l = self.tub.getListeners()[0]
portnum = l.getPortnum()
# record which port we're listening on, so we can grab the same one next time
open(self._portnumfile, "w").write("%d\n" % portnum)
local_addresses = [ "%s:%d" % (addr, portnum,) for addr in local_addresses ]
addresses = []
try:
for addrline in open(os.path.join(self.basedir, self.LOCAL_IP_FILE), "rU"):
mo = ADDR_RE.search(addrline)
if mo:
(addr, dummy, aportnum,) = mo.groups()
if aportnum is None:
aportnum = portnum
addresses.append("%s:%d" % (addr, int(aportnum),))
except EnvironmentError:
pass
addresses.extend(local_addresses)
location = ",".join(addresses)
self.log("Tub location set to %s" % location)
self.tub.setLocation(location)
return self.tub
def tub_ready(self):
# called when the Tub is available for registerReference
self.add_service(LogPublisher())
log_gatherer_furl = self.get_config("log_gatherer.furl")
if log_gatherer_furl:
self.tub.connectTo(log_gatherer_furl, self._log_gatherer_connected)
def _log_gatherer_connected(self, rref):
rref.callRemote("logport",
self.nodeid, self.getServiceNamed("log_publisher"))
def when_tub_ready(self):
return self._tub_ready_observerlist.when_fired()
def add_service(self, s):
s.setServiceParent(self)
return s