Merge pull request #1220 from tahoe-lafs/3902-listen-storage-http

Listen with storage HTTP protocol on same port as Foolscap

Fixes ticket:3902
This commit is contained in:
Itamar Turner-Trauring 2022-10-04 10:32:13 -04:00 committed by GitHub
commit f56b43468b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 346 additions and 80 deletions

View File

@ -47,27 +47,27 @@ This can be considered to expand to "**N**\ ew URLs" or "Authe\ **N**\ ticating
The anticipated use for a **NURL** will still be to establish a TLS connection to a peer. The anticipated use for a **NURL** will still be to establish a TLS connection to a peer.
The protocol run over that TLS connection could be Foolscap though it is more likely to be an HTTP-based protocol (such as GBS). The protocol run over that TLS connection could be Foolscap though it is more likely to be an HTTP-based protocol (such as GBS).
Unlike fURLs, only a single net-loc is included, for consistency with other forms of URLs.
As a result, multiple NURLs may be available for a single server.
Syntax Syntax
------ ------
The EBNF for a NURL is as follows:: The EBNF for a NURL is as follows::
nurl = scheme, hash, "@", net-loc-list, "/", swiss-number, [ version1 ] nurl = tcp-nurl | tor-nurl | i2p-nurl
tcp-nurl = "pb://", hash, "@", tcp-loc, "/", swiss-number, [ version1 ]
scheme = "pb://" tor-nurl = "pb+tor://", hash, "@", tcp-loc, "/", swiss-number, [ version1 ]
i2p-nurl = "pb+i2p://", hash, "@", i2p-loc, "/", swiss-number, [ version1 ]
hash = unreserved hash = unreserved
net-loc-list = net-loc, [ { ",", net-loc } ] tcp-loc = hostname, [ ":" port ]
net-loc = tcp-loc | tor-loc | i2p-loc
tcp-loc = [ "tcp:" ], hostname, [ ":" port ]
tor-loc = "tor:", hostname, [ ":" port ]
i2p-loc = "i2p:", i2p-addr, [ ":" port ]
i2p-addr = { unreserved }, ".i2p"
hostname = domain | IPv4address | IPv6address hostname = domain | IPv4address | IPv6address
i2p-loc = i2p-addr, [ ":" port ]
i2p-addr = { unreserved }, ".i2p"
swiss-number = segment swiss-number = segment
version1 = "#v=1" version1 = "#v=1"
@ -87,11 +87,13 @@ These differences are separated into distinct versions.
Version 0 Version 0
--------- ---------
A Foolscap fURL is considered the canonical definition of a version 0 NURL. In theory, a Foolscap fURL with a single netloc is considered the canonical definition of a version 0 NURL.
Notably, Notably,
the hash component is defined as the base32-encoded SHA1 hash of the DER form of an x509v3 certificate. the hash component is defined as the base32-encoded SHA1 hash of the DER form of an x509v3 certificate.
A version 0 NURL is identified by the absence of the ``v=1`` fragment. A version 0 NURL is identified by the absence of the ``v=1`` fragment.
In practice, real world fURLs may have more than one netloc, so lack of version fragment will likely just involve dispatching the fURL to a different parser.
Examples Examples
~~~~~~~~ ~~~~~~~~
@ -119,7 +121,7 @@ The hash component of a version 1 NURL differs in three ways from the prior vers
*all* certificate fields should be considered within the context of the relationship identified by the SPKI hash. *all* certificate fields should be considered within the context of the relationship identified by the SPKI hash.
3. The hash is encoded using urlsafe-base64 (without padding) instead of base32. 3. The hash is encoded using urlsafe-base64 (without padding) instead of base32.
This provides a more compact representation and minimizes the usability impacts of switching from a 160 bit hash to a 224 bit hash. This provides a more compact representation and minimizes the usability impacts of switching from a 160 bit hash to a 256 bit hash.
A version 1 NURL is identified by the presence of the ``v=1`` fragment. A version 1 NURL is identified by the presence of the ``v=1`` fragment.
Though the length of the hash string (38 bytes) could also be used to differentiate it from a version 0 NURL, Though the length of the hash string (38 bytes) could also be used to differentiate it from a version 0 NURL,

View File

@ -0,0 +1 @@
The new HTTPS-based storage server is now enabled transparently on the same port as the Foolscap server. This will not have any user-facing impact until the HTTPS storage protocol is supported in clients as well.

View File

@ -1,17 +1,9 @@
""" """
Ported to Python 3. Ported to Python 3.
""" """
from __future__ import absolute_import from __future__ import annotations
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401
# Don't use future str to prevent leaking future's newbytes into foolscap, which they break.
from past.builtins import unicode as str
from typing import Optional
import os, stat, time, weakref import os, stat, time, weakref
from base64 import urlsafe_b64encode from base64 import urlsafe_b64encode
from functools import partial from functools import partial
@ -591,6 +583,10 @@ def anonymous_storage_enabled(config):
@implementer(IStatsProducer) @implementer(IStatsProducer)
class _Client(node.Node, pollmixin.PollMixin): class _Client(node.Node, pollmixin.PollMixin):
"""
This class should be refactored; see
https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3931
"""
STOREDIR = 'storage' STOREDIR = 'storage'
NODETYPE = "client" NODETYPE = "client"
@ -658,6 +654,14 @@ class _Client(node.Node, pollmixin.PollMixin):
if webport: if webport:
self.init_web(webport) # strports string self.init_web(webport) # strports string
# TODO this may be the wrong location for now? but as temporary measure
# it allows us to get NURLs for testing in test_istorageserver.py. This
# will eventually get fixed one way or another in
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3901. See also
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3931 for the bigger
# picture issue.
self.storage_nurls : Optional[set] = None
def init_stats_provider(self): def init_stats_provider(self):
self.stats_provider = StatsProvider(self) self.stats_provider = StatsProvider(self)
self.stats_provider.setServiceParent(self) self.stats_provider.setServiceParent(self)
@ -818,6 +822,10 @@ class _Client(node.Node, pollmixin.PollMixin):
if anonymous_storage_enabled(self.config): if anonymous_storage_enabled(self.config):
furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding())
furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file)
(_, _, swissnum) = decode_furl(furl)
self.storage_nurls = self.tub.negotiationClass.add_storage_server(
ss, swissnum.encode("ascii")
)
announcement["anonymous-storage-FURL"] = furl announcement["anonymous-storage-FURL"] = furl
enabled_storage_servers = self._enable_storage_servers( enabled_storage_servers = self._enable_storage_servers(

View File

@ -55,6 +55,8 @@ from allmydata.util.yamlutil import (
from . import ( from . import (
__full_version__, __full_version__,
) )
from .protocol_switch import create_tub_with_https_support
def _common_valid_config(): def _common_valid_config():
return configutil.ValidConfiguration({ return configutil.ValidConfiguration({
@ -706,7 +708,10 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han
:param dict tub_options: every key-value pair in here will be set in :param dict tub_options: every key-value pair in here will be set in
the new Tub via `Tub.setOption` the new Tub via `Tub.setOption`
""" """
tub = Tub(**kwargs) # We listen simulataneously for both Foolscap and HTTPS on the same port,
# so we have to create a special Foolscap Tub for that to work:
tub = create_tub_with_https_support(**kwargs)
for (name, value) in list(tub_options.items()): for (name, value) in list(tub_options.items()):
tub.setOption(name, value) tub.setOption(name, value)
handlers = default_connection_handlers.copy() handlers = default_connection_handlers.copy()

View File

@ -0,0 +1,210 @@
"""
Support for listening with both HTTPS and Foolscap on the same port.
The goal is to make the transition from Foolscap to HTTPS-based protocols as
simple as possible, with no extra configuration needed. Listening on the same
port means a user upgrading Tahoe-LAFS will automatically get HTTPS working
with no additional changes.
Use ``create_tub_with_https_support()`` creates a new ``Tub`` that has its
``negotiationClass`` modified to be a new subclass tied to that specific
``Tub`` instance. Calling ``tub.negotiationClass.add_storage_server(...)``
then adds relevant information for a storage server once it becomes available
later in the configuration process.
"""
from __future__ import annotations
from itertools import chain
from twisted.internet.protocol import Protocol
from twisted.internet.interfaces import IDelayedCall
from twisted.internet.ssl import CertificateOptions
from twisted.web.server import Site
from twisted.protocols.tls import TLSMemoryBIOFactory
from twisted.internet import reactor
from hyperlink import DecodedURL
from foolscap.negotiate import Negotiation
from foolscap.api import Tub
from .storage.http_server import HTTPServer, build_nurl
from .storage.server import StorageServer
class _PretendToBeNegotiation(type):
"""
Metaclass that allows ``_FoolscapOrHttps`` to pretend to be a
``Negotiation`` instance, since Foolscap does some checks like
``assert isinstance(protocol, tub.negotiationClass)`` in its internals,
and sometimes that ``protocol`` is a ``_FoolscapOrHttps`` instance, but
sometimes it's a ``Negotiation`` instance.
"""
def __instancecheck__(self, instance):
return issubclass(instance.__class__, self) or isinstance(instance, Negotiation)
class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation):
"""
Based on initial query, decide whether we're talking Foolscap or HTTP.
Additionally, pretends to be a ``foolscap.negotiate.Negotiation`` instance,
since these are created by Foolscap's ``Tub``, by setting this to be the
tub's ``negotiationClass``.
Do not instantiate directly, use ``create_tub_with_https_support(...)``
instead. The way this class works is that a new subclass is created for a
specific ``Tub`` instance.
"""
# These are class attributes; they will be set by
# create_tub_with_https_support() and add_storage_server().
# The Twisted HTTPS protocol factory wrapping the storage server HTTP API:
https_factory: TLSMemoryBIOFactory
# The tub that created us:
tub: Tub
@classmethod
def add_storage_server(
cls, storage_server: StorageServer, swissnum: bytes
) -> set[DecodedURL]:
"""
Update a ``_FoolscapOrHttps`` subclass for a specific ``Tub`` instance
with the class attributes it requires for a specific storage server.
Returns the resulting NURLs.
"""
# We need to be a subclass:
assert cls != _FoolscapOrHttps
# The tub instance must already be set:
assert hasattr(cls, "tub")
assert isinstance(cls.tub, Tub)
# Tub.myCertificate is a twisted.internet.ssl.PrivateCertificate
# instance.
certificate_options = CertificateOptions(
privateKey=cls.tub.myCertificate.privateKey.original,
certificate=cls.tub.myCertificate.original,
)
http_storage_server = HTTPServer(storage_server, swissnum)
cls.https_factory = TLSMemoryBIOFactory(
certificate_options,
False,
Site(http_storage_server.get_resource()),
)
storage_nurls = set()
# Individual hints can be in the form
# "tcp:host:port,tcp:host:port,tcp:host:port".
for location_hint in chain.from_iterable(
hints.split(",") for hints in cls.tub.locationHints
):
if location_hint.startswith("tcp:"):
_, hostname, port = location_hint.split(":")
port = int(port)
storage_nurls.add(
build_nurl(
hostname,
port,
str(swissnum, "ascii"),
cls.tub.myCertificate.original.to_cryptography(),
)
)
# TODO this is probably where we'll have to support Tor and I2P?
# See https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3888#comment:9
# for discussion (there will be separate tickets added for those at
# some point.)
return storage_nurls
def __init__(self, *args, **kwargs):
self._foolscap: Negotiation = Negotiation(*args, **kwargs)
def __setattr__(self, name, value):
if name in {"_foolscap", "_buffer", "transport", "__class__", "_timeout"}:
object.__setattr__(self, name, value)
else:
setattr(self._foolscap, name, value)
def __getattr__(self, name):
return getattr(self._foolscap, name)
def _convert_to_negotiation(self):
"""
Convert self to a ``Negotiation`` instance.
"""
self.__class__ = Negotiation # type: ignore
self.__dict__ = self._foolscap.__dict__
def initClient(self, *args, **kwargs):
# After creation, a Negotiation instance either has initClient() or
# initServer() called. Since this is a client, we're never going to do
# HTTP, so we can immediately become a Negotiation instance.
assert not hasattr(self, "_buffer")
self._convert_to_negotiation()
return self.initClient(*args, **kwargs)
def connectionMade(self):
self._buffer: bytes = b""
self._timeout: IDelayedCall = reactor.callLater(
30, self.transport.abortConnection
)
def connectionLost(self, reason):
if self._timeout.active():
self._timeout.cancel()
def dataReceived(self, data: bytes) -> None:
"""Handle incoming data.
Once we've decided which protocol we are, update self.__class__, at
which point all methods will be called on the new class.
"""
self._buffer += data
if len(self._buffer) < 8:
return
# Check if it looks like a Foolscap request. If so, it can handle this
# and later data, otherwise assume HTTPS.
self._timeout.cancel()
if self._buffer.startswith(b"GET /id/"):
# We're a Foolscap Negotiation server protocol instance:
transport = self.transport
buf = self._buffer
self._convert_to_negotiation()
self.makeConnection(transport)
self.dataReceived(buf)
return
else:
# We're a HTTPS protocol instance, serving the storage protocol:
assert self.transport is not None
protocol = self.https_factory.buildProtocol(self.transport.getPeer())
protocol.makeConnection(self.transport)
protocol.dataReceived(self._buffer)
# Update the factory so it knows we're transforming to a new
# protocol object (we'll do that next)
value = self.https_factory.protocols.pop(protocol)
self.https_factory.protocols[self] = value
# Transform self into the TLS protocol 🪄
self.__class__ = protocol.__class__
self.__dict__ = protocol.__dict__
def create_tub_with_https_support(**kwargs) -> Tub:
"""
Create a new Tub that also supports HTTPS.
This involves creating a new protocol switch class for the specific ``Tub``
instance.
"""
the_tub = Tub(**kwargs)
class FoolscapOrHttpForTub(_FoolscapOrHttps):
tub = the_tub
the_tub.negotiationClass = FoolscapOrHttpForTub # type: ignore
return the_tub

View File

@ -4,12 +4,13 @@ HTTP server for storage.
from __future__ import annotations from __future__ import annotations
from typing import Dict, List, Set, Tuple, Any, Callable, Union from typing import Dict, List, Set, Tuple, Any, Callable, Union, cast
from functools import wraps from functools import wraps
from base64 import b64decode from base64 import b64decode
import binascii import binascii
from tempfile import TemporaryFile from tempfile import TemporaryFile
from cryptography.x509 import Certificate as CryptoCertificate
from zope.interface import implementer from zope.interface import implementer
from klein import Klein from klein import Klein
from twisted.web import http from twisted.web import http
@ -18,6 +19,7 @@ from twisted.internet.interfaces import (
IStreamServerEndpoint, IStreamServerEndpoint,
IPullProducer, IPullProducer,
) )
from twisted.internet.address import IPv4Address, IPv6Address
from twisted.internet.defer import Deferred from twisted.internet.defer import Deferred
from twisted.internet.ssl import CertificateOptions, Certificate, PrivateCertificate from twisted.internet.ssl import CertificateOptions, Certificate, PrivateCertificate
from twisted.web.server import Site, Request from twisted.web.server import Site, Request
@ -193,7 +195,12 @@ class UploadsInProgress(object):
def remove_write_bucket(self, bucket: BucketWriter): def remove_write_bucket(self, bucket: BucketWriter):
"""Stop tracking the given ``BucketWriter``.""" """Stop tracking the given ``BucketWriter``."""
storage_index, share_number = self._bucketwriters.pop(bucket) try:
storage_index, share_number = self._bucketwriters.pop(bucket)
except KeyError:
# This is probably a BucketWriter created by Foolscap, so just
# ignore it.
return
uploads_index = self._uploads[storage_index] uploads_index = self._uploads[storage_index]
uploads_index.shares.pop(share_number) uploads_index.shares.pop(share_number)
uploads_index.upload_secrets.pop(share_number) uploads_index.upload_secrets.pop(share_number)
@ -862,6 +869,29 @@ class _TLSEndpointWrapper(object):
) )
def build_nurl(
hostname: str, port: int, swissnum: str, certificate: CryptoCertificate
) -> DecodedURL:
"""
Construct a HTTPS NURL, given the hostname, port, server swissnum, and x509
certificate for the server. Clients can then connect to the server using
this NURL.
"""
return DecodedURL().replace(
fragment="v=1", # how we know this NURL is HTTP-based (i.e. not Foolscap)
host=hostname,
port=port,
path=(swissnum,),
userinfo=(
str(
get_spki_hash(certificate),
"ascii",
),
),
scheme="pb",
)
def listen_tls( def listen_tls(
server: HTTPServer, server: HTTPServer,
hostname: str, hostname: str,
@ -881,22 +911,15 @@ def listen_tls(
""" """
endpoint = _TLSEndpointWrapper.from_paths(endpoint, private_key_path, cert_path) endpoint = _TLSEndpointWrapper.from_paths(endpoint, private_key_path, cert_path)
def build_nurl(listening_port: IListeningPort) -> DecodedURL: def get_nurl(listening_port: IListeningPort) -> DecodedURL:
nurl = DecodedURL().replace( address = cast(Union[IPv4Address, IPv6Address], listening_port.getHost())
fragment="v=1", # how we know this NURL is HTTP-based (i.e. not Foolscap) return build_nurl(
host=hostname, hostname,
port=listening_port.getHost().port, address.port,
path=(str(server._swissnum, "ascii"),), str(server._swissnum, "ascii"),
userinfo=( load_pem_x509_certificate(cert_path.getContent()),
str(
get_spki_hash(load_pem_x509_certificate(cert_path.getContent())),
"ascii",
),
),
scheme="pb",
) )
return nurl
return endpoint.listen(Site(server.get_resource())).addCallback( return endpoint.listen(Site(server.get_resource())).addCallback(
lambda listening_port: (build_nurl(listening_port), listening_port) lambda listening_port: (get_nurl(listening_port), listening_port)
) )

View File

@ -18,21 +18,14 @@ from unittest import SkipTest
from twisted.internet.defer import inlineCallbacks, returnValue, succeed from twisted.internet.defer import inlineCallbacks, returnValue, succeed
from twisted.internet.task import Clock from twisted.internet.task import Clock
from twisted.internet import reactor from twisted.internet import reactor
from twisted.internet.endpoints import serverFromString
from twisted.python.filepath import FilePath
from foolscap.api import Referenceable, RemoteException from foolscap.api import Referenceable, RemoteException
from allmydata.interfaces import IStorageServer # really, IStorageClient # A better name for this would be IStorageClient...
from allmydata.interfaces import IStorageServer
from .common_system import SystemTestMixin from .common_system import SystemTestMixin
from .common import AsyncTestCase, SameProcessStreamEndpointAssigner from .common import AsyncTestCase
from .certs import (
generate_certificate,
generate_private_key,
private_key_to_file,
cert_to_file,
)
from allmydata.storage.server import StorageServer # not a IStorageServer!! from allmydata.storage.server import StorageServer # not a IStorageServer!!
from allmydata.storage.http_server import HTTPServer, listen_tls
from allmydata.storage.http_client import StorageClient from allmydata.storage.http_client import StorageClient
from allmydata.storage_client import _HTTPStorageServer from allmydata.storage_client import _HTTPStorageServer
@ -1084,40 +1077,17 @@ class _FoolscapMixin(_SharedMixin):
class _HTTPMixin(_SharedMixin): class _HTTPMixin(_SharedMixin):
"""Run tests on the HTTP version of ``IStorageServer``.""" """Run tests on the HTTP version of ``IStorageServer``."""
def setUp(self):
self._port_assigner = SameProcessStreamEndpointAssigner()
self._port_assigner.setUp()
self.addCleanup(self._port_assigner.tearDown)
return _SharedMixin.setUp(self)
@inlineCallbacks
def _get_istorage_server(self): def _get_istorage_server(self):
swissnum = b"1234" nurl = list(self.clients[0].storage_nurls)[0]
http_storage_server = HTTPServer(self.server, swissnum)
# Listen on randomly assigned port, using self-signed cert:
private_key = generate_private_key()
certificate = generate_certificate(private_key)
_, endpoint_string = self._port_assigner.assign(reactor)
nurl, listening_port = yield listen_tls(
http_storage_server,
"127.0.0.1",
serverFromString(reactor, endpoint_string),
private_key_to_file(FilePath(self.mktemp()), private_key),
cert_to_file(FilePath(self.mktemp()), certificate),
)
self.addCleanup(listening_port.stopListening)
# Create HTTP client with non-persistent connections, so we don't leak # Create HTTP client with non-persistent connections, so we don't leak
# state across tests: # state across tests:
returnValue( client: IStorageServer = _HTTPStorageServer.from_http_client(
_HTTPStorageServer.from_http_client( StorageClient.from_nurl(nurl, reactor, persistent=False)
StorageClient.from_nurl(nurl, reactor, persistent=False)
)
) )
self.assertTrue(IStorageServer.providedBy(client))
# Eventually should also: return succeed(client)
# self.assertTrue(IStorageServer.providedBy(client))
class FoolscapSharedAPIsTests( class FoolscapSharedAPIsTests(

View File

@ -0,0 +1,43 @@
"""
Unit tests for ``allmydata.protocol_switch``.
By its nature, most of the testing needs to be end-to-end; essentially any test
that uses real Foolscap (``test_system.py``, integration tests) ensures
Foolscap still works. ``test_istorageserver.py`` tests the HTTP support.
"""
from foolscap.negotiate import Negotiation
from .common import TestCase
from ..protocol_switch import _PretendToBeNegotiation
class UtilityTests(TestCase):
"""Tests for utilities in the protocol switch code."""
def test_metaclass(self):
"""
A class that has the ``_PretendToBeNegotiation`` metaclass will support
``isinstance()``'s normal semantics on its own instances, but will also
indicate that ``Negotiation`` instances are its instances.
"""
class Parent(metaclass=_PretendToBeNegotiation):
pass
class Child(Parent):
pass
class Other:
pass
p = Parent()
self.assertIsInstance(p, Parent)
self.assertIsInstance(Negotiation(), Parent)
self.assertNotIsInstance(Other(), Parent)
c = Child()
self.assertIsInstance(c, Child)
self.assertIsInstance(c, Parent)
self.assertIsInstance(Negotiation(), Child)
self.assertNotIsInstance(Other(), Child)

View File

@ -198,6 +198,10 @@ class PinningHTTPSValidation(AsyncTestCase):
response = await self.request(url, certificate) response = await self.request(url, certificate)
self.assertEqual(await response.content(), b"YOYODYNE") self.assertEqual(await response.content(), b"YOYODYNE")
# We keep getting TLSMemoryBIOProtocol being left around, so try harder
# to wait for it to finish.
await deferLater(reactor, 0.001)
# A potential attack to test is a private key that doesn't match the # A potential attack to test is a private key that doesn't match the
# certificate... but OpenSSL (quite rightly) won't let you listen with that # certificate... but OpenSSL (quite rightly) won't let you listen with that
# so I don't know how to test that! See # so I don't know how to test that! See