From 4e010da54b3d660376e4d583a2ca3e8befd60899 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Thu, 26 Sep 2013 17:45:19 -0400 Subject: [PATCH] Work in progress... --- node/Address.hpp | 9 -- node/Constants.hpp | 10 +-- node/Multicaster.cpp | 17 +--- node/Multicaster.hpp | 61 +++++++++----- node/Network.cpp | 2 - node/Network.hpp | 27 ------ node/Packet.cpp | 4 +- node/Packet.hpp | 171 +++++++++++++++++++------------------ node/PacketDecoder.cpp | 185 +++++++++++++++++++++-------------------- node/Topology.cpp | 2 +- 10 files changed, 231 insertions(+), 257 deletions(-) diff --git a/node/Address.hpp b/node/Address.hpp index 51dd84ec5..034bc1449 100644 --- a/node/Address.hpp +++ b/node/Address.hpp @@ -231,15 +231,6 @@ public: */ inline operator bool() const throw() { return (_a != 0); } - /** - * @return Sum of all bytes in address - */ - inline unsigned int sum() const - throw() - { - return (unsigned int)(((_a >> 32) & 0xff) + ((_a >> 24) & 0xff) + ((_a >> 16) & 0xff) + ((_a >> 8) & 0xff) + (_a & 0xff)); - } - /** * Check if this address is reserved * diff --git a/node/Constants.hpp b/node/Constants.hpp index a46937ed5..f7000bfe0 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -230,6 +230,11 @@ error_no_ZT_ARCH_defined; */ #define ZT_RELAY_MAX_HOPS 3 +/** + * Size of multicast deduplication ring buffer in 64-bit ints + */ +#define ZT_MULTICAST_DEDUP_HISTORY_LENGTH 512 + /** * Period between announcements of all multicast 'likes' in ms * @@ -244,11 +249,6 @@ error_no_ZT_ARCH_defined; */ #define ZT_MULTICAST_LIKE_EXPIRE ((ZT_MULTICAST_LIKE_ANNOUNCE_ALL_PERIOD * 2) + 1000) -/** - * Expiration for remembered MULTICAST_GOTs, in ms - */ -#define ZT_MULTICAST_MAGNET_STATE_EXPIRE 30000 - /** * Time between polls of local taps for multicast membership changes */ diff --git a/node/Multicaster.cpp b/node/Multicaster.cpp index e545289e2..36fd98fd9 100644 --- a/node/Multicaster.cpp +++ b/node/Multicaster.cpp @@ -76,15 +76,6 @@ void Multicaster::bringCloser(uint64_t nwid,const Address &a) } } -void Multicaster::got(uint64_t nwid,const Address &peer,uint64_t mcGuid) -{ - Mutex::Lock _l(_lock); - _NetInfo &n = _nets[nwid]; - std::pair< uint64_t,std::set
> &g = n.got[mcGuid]; - g.first = Utils::now(); - g.second.insert(peer); -} - void Multicaster::clean() { Mutex::Lock _l(_lock); @@ -92,14 +83,8 @@ void Multicaster::clean() uint64_t now = Utils::now(); for(std::map< uint64_t,_NetInfo >::iterator n(_nets.begin());n!=_nets.end();) { - for(std::map< uint64_t,std::pair< uint64_t,std::set
> >::iterator g(n->second.got.begin());g!=n->second.got.end();) { - if ((now - g->second.first) > ZT_MULTICAST_MAGNET_STATE_EXPIRE) - n->second.got.erase(g++); - else ++g; - } - for(std::map< _Subscription,_SubInfo >::iterator s(n->second.subscriptions.begin());s!=n->second.subscriptions.end();) { - if ((now - s->second.lastLike) > ZT_MULTICAST_LIKE_EXPIRE) { + if ((now - s->second.lastLike) >= ZT_MULTICAST_LIKE_EXPIRE) { std::map< MulticastGroup,std::list< Address > >::iterator p(n->second.proximity.find(s->first.second)); p->second.erase(s->second.proximitySlot); if (p->second.empty()) diff --git a/node/Multicaster.hpp b/node/Multicaster.hpp index fb9bfe2d8..2979bc3b4 100644 --- a/node/Multicaster.hpp +++ b/node/Multicaster.hpp @@ -41,6 +41,7 @@ #include "Mutex.hpp" #include "MulticastGroup.hpp" #include "Address.hpp" +#include "Buffer.hpp" namespace ZeroTier { @@ -72,22 +73,34 @@ public: */ void bringCloser(uint64_t nwid,const Address &a); - /** - * Indicate that a peer reported that it GOT a multicast - * - * This only happens on magnet nodes for a propagation. - * - * @param nwid Network ID - * @param mcGuid Multicast GUID - * @param peer Peer that GOT multicast - */ - void got(uint64_t nwid,const Address &peer,uint64_t mcGuid); - /** * Erase entries for expired LIKEs and GOT records */ void clean(); + /** + * Multicast deduplicator + * + * This checks to see if a multicast GUID has been seen before. If not, it + * adds it to the history and returns false. + * + * @param nwid Network ID + * @param mcGuid Multicast GUID (sender address + sender unique ID) + * @return True if multicast IS a duplicate, false otherwise + */ + inline bool deduplicate(uint64_t nwid,uint64_t mcGuid) + throw() + { + Mutex::Lock _l(_lock); + _NetInfo &n = _nets[nwid]; + for(unsigned int i=0;i - inline unsigned int getNextHops(uint64_t nwid,const MulticastGroup &mg,uint64_t mcGuid,F nextHopFunc) + inline unsigned int getNextHops(uint64_t nwid,const MulticastGroup &mg,F nextHopFunc) { Mutex::Lock _l(_lock); @@ -111,16 +123,11 @@ public: std::map< MulticastGroup,std::list< Address > >::iterator p(n->second.proximity.find(mg)); if (p == n->second.proximity.end()) return 0; - std::pair< uint64_t,std::set< Address > > &g = n->second.got[mcGuid]; - g.first = Utils::now(); unsigned int cnt = 0; for(std::list< Address >::iterator a(p->second.begin());a!=p->second.end();++a) { - if (g.second.insert(*a).second) { - ++cnt; - if (!nextHopFunc(*a)) - break; - } + if (!nextHopFunc(*a)) + break; } return cnt; } @@ -141,13 +148,21 @@ private: }; // An address and multicast group tuple - typedef std::pair _Subscription; + typedef std::pair< Address,MulticastGroup > _Subscription; // Multicast info for a given network struct _NetInfo { - // GOTs by multicast GUID: time of last GOT, addresses that GOT - std::map< uint64_t,std::pair< uint64_t,std::set< Address > > > got; + _NetInfo() + throw() + { + memset(multicastHistory,0,sizeof(multicastHistory)); + multicastHistoryPtr = 0; + } + + // Ring buffer of most recently injected multicast packet GUIDs + uint64_t multicastHistory[ZT_MULTICAST_DEDUP_HISTORY_LENGTH]; + unsigned int multicastHistoryPtr; // Peer proximity ordering for peers subscribed to each group std::map< MulticastGroup,std::list< Address > > proximity; diff --git a/node/Network.cpp b/node/Network.cpp index bfc4b0130..65e61738d 100644 --- a/node/Network.cpp +++ b/node/Network.cpp @@ -157,8 +157,6 @@ SharedPtr Network::newInstance(const RuntimeEnvironment *renv,uint64_t // that then causes the Network instance to be deleted before it is finished // being constructed. C++ edge cases, how I love thee. SharedPtr nw(new Network()); - memset(nw->_multicastHistory,0,sizeof(nw->_multicastHistory)); - nw->_multicastHistoryPtr = 0; nw->_ready = false; // disable handling of Ethernet frames during construct nw->_r = renv; nw->_tap = new EthernetTap(renv,tag,renv->identity.address().toMAC(),ZT_IF_MTU,&_CBhandleTapData,nw.ptr()); diff --git a/node/Network.hpp b/node/Network.hpp index 993ee6a67..2ed705049 100644 --- a/node/Network.hpp +++ b/node/Network.hpp @@ -52,8 +52,6 @@ #include "InetAddress.hpp" #include "BandwidthAccount.hpp" -#define ZT_NETWORK_MULTICAST_DEDUP_HISTORY_LENGTH 256 - namespace ZeroTier { class RuntimeEnvironment; @@ -585,27 +583,6 @@ public: //return tmp; } - /** - * Multicast deduplicator - * - * This checks to see if a multicast GUID has been seen before. If not, it - * adds it to the history and returns false. - * - * @param mcGuid Multicast GUID (sender address + sender unique ID) - * @return True if multicast IS a duplicate, false otherwise - */ - inline bool multicastDeduplicate(uint64_t mcGuid) - throw() - { - Mutex::Lock _l(_lock); - for(unsigned int i=0;i,BandwidthAccount > _multicastRateAccounts; diff --git a/node/Packet.cpp b/node/Packet.cpp index f97317525..46c55c99b 100644 --- a/node/Packet.cpp +++ b/node/Packet.cpp @@ -42,9 +42,9 @@ const char *Packet::verbString(Verb v) case VERB_WHOIS: return "WHOIS"; case VERB_RENDEZVOUS: return "RENDEZVOUS"; case VERB_FRAME: return "FRAME"; - case VERB_MULTICAST_LIKE: return "MULTICAST_LIKE"; - case VERB_MULTICAST_GOT: return "MULTICAST_GOT"; + case VERB_PROXY_FRAME: return "PROXY_FRAME"; case VERB_MULTICAST_FRAME: return "MULTICAST_FRAME"; + case VERB_MULTICAST_LIKE: return "MULTICAST_LIKE"; case VERB_NETWORK_MEMBERSHIP_CERTIFICATE: return "NETWORK_MEMBERSHIP_CERTIFICATE"; case VERB_NETWORK_CONFIG_REQUEST: return "NETWORK_CONFIG_REQUEST"; case VERB_NETWORK_CONFIG_REFRESH: return "NETWORK_CONFIG_REFRESH"; diff --git a/node/Packet.hpp b/node/Packet.hpp index 0aa6b9495..9a90427f3 100644 --- a/node/Packet.hpp +++ b/node/Packet.hpp @@ -98,14 +98,9 @@ #define ZT_PACKET_IDX_PAYLOAD 28 /** - * ZeroTier packet buffer size - * - * This can be changed. This provides enough room for MTU-size packet - * payloads plus some overhead. The subtraction of sizeof(unsigned int) - * makes it an even multiple of 1024 (see Buffer), which might reduce - * memory use a little. + * Packet buffer size (can be changed) */ -#define ZT_PROTO_MAX_PACKET_LENGTH (3072 - sizeof(unsigned int)) +#define ZT_PROTO_MAX_PACKET_LENGTH (ZT_MAX_PACKET_FRAGMENTS * ZT_UDP_DEFAULT_PAYLOAD_MTU) /** * Minimum viable packet length (also length of header) @@ -164,22 +159,37 @@ #define ZT_PROTO_VERB_FRAME_IDX_ETHERTYPE (ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID + 8) #define ZT_PROTO_VERB_FRAME_IDX_PAYLOAD (ZT_PROTO_VERB_FRAME_IDX_ETHERTYPE + 2) -#define ZT_PROTO_VERB_MULTICAST_GOT_IDX_NETWORK_ID (ZT_PACKET_IDX_PAYLOAD) -#define ZT_PROTO_VERB_MULTICAST_GOT_IDX_MULTICAST_GUID (ZT_PROTO_VERB_MULTICAST_GOT_IDX_NETWORK_ID + 8) - -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FORWARD_COUNT (ZT_PACKET_IDX_PAYLOAD) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_QUEUE (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FORWARD_COUNT + 4) -#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_QUEUE 320 -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_MAGNET (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_QUEUE + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_QUEUE) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SUBMITTER (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_MAGNET + 5) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SUBMITTER_UNIQUE_ID (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SUBMITTER + 5) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_NETWORK_ID (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SUBMITTER_UNIQUE_ID + 3) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SOURCE_MAC (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_NETWORK_ID + 8) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DESTINATION_MAC (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SOURCE_MAC + 6) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DESTINATION_ADI (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DESTINATION_MAC + 6) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ETHERTYPE (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DESTINATION_ADI + 4) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PAYLOAD_LENGTH (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ETHERTYPE + 2) -#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PAYLOAD (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PAYLOAD_LENGTH + 2) +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_DEPTH (ZT_PACKET_IDX_PAYLOAD) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_DEPTH 2 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_FIFO (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_DEPTH + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_DEPTH) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_FIFO 320 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_BLOOM (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_FIFO + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_FIFO) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_BLOOM 1024 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_NETWORK_ID (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_BLOOM + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_BLOOM) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_NETWORK_ID 8 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_BLOOM_NONCE (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_NETWORK_ID + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_NETWORK_ID) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_BLOOM_NONCE 2 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_PREFIX_BITS (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_BLOOM_NONCE + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_BLOOM_NONCE) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_PREFIX_BITS 1 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_PREFIX (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_PREFIX_BITS + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_PREFIX_BITS) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_PREFIX 2 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ORIGIN (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_PREFIX + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_PREFIX) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_ORIGIN 5 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ORIGIN_MCID (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ORIGIN + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_ORIGIN) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_ORIGIN_MCID 3 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_GUID (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ORIGIN) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_GUID 8 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SOURCE_MAC (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ORIGIN_MCID + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_ORIGIN_MCID) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_SOURCE_MAC 6 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DEST_MAC (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SOURCE_MAC + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_SOURCE_MAC) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_DEST_MAC 6 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DEST_ADI (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DEST_MAC + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_DEST_MAC) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_DEST_ADI 4 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ETHERTYPE (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DEST_ADI + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_DEST_ADI) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_ETHERTYPE 2 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME_LEN (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ETHERTYPE + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_ETHERTYPE) +#define ZT_PROTO_VERB_MULTICAST_FRAME_LEN_FRAME_LEN 2 +#define ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PAYLOAD_LEN + ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PAYLOAD_LEN) #define ZT_PROTO_VERB_NETWORK_CONFIG_REQUEST_IDX_NETWORK_ID (ZT_PACKET_IDX_PAYLOAD) #define ZT_PROTO_VERB_NETWORK_CONFIG_REQUEST_IDX_DICT_LEN (ZT_PROTO_VERB_NETWORK_CONFIG_REQUEST_IDX_NETWORK_ID + 8) @@ -459,35 +469,23 @@ public: */ VERB_FRAME = 6, - /* Announce interest in multicast group(s): - * <[8] 64-bit network ID> - * <[6] multicast Ethernet address> - * <[4] multicast additional distinguishing information (ADI)> - * [... additional tuples of network/address/adi ...] - * - * OK/ERROR are not generated. - */ - VERB_MULTICAST_LIKE = 7, - - /* Announce receipt of a multicast to propagation magnet node: - * <[8] 64-bit network ID> - * <[8] 64-bit multicast GUID> - * - * OK/ERROR are not generated. - */ - VERB_MULTICAST_GOT = 8, + /* TODO: not implemented yet */ + VERB_PROXY_FRAME = 7, /* A multicast frame: - * <[4] 32-bit forwarding counter> - * <[320] FIFO queue of up to 64 ZT addresses, zero address terminated> - * [... start of signed portion, signed by original submitter below ...] - * <[5] ZeroTier address of propagation magnet node> - * <[5] ZeroTier address of original submitter/signer> - * <[3] 24-bit multicast ID, combined with signer address to form GUID> + * <[2] 16-bit propagation depth> + * <[320] propagation FIFO> + * <[1024] propagation bloom filter> + * [... begin signed portion ...] * <[8] 64-bit network ID> + * <[2] 16-bit random propagation bloom filter nonce> + * <[1] number of significant bits in propagation restrict prefix> + * <[2] 16-bit propagation restriction prefix (left to right)> + * <[5] ZeroTier address of node of origin> + * <[3] 24-bit multicast ID, together with origin forms GUID> * <[6] source MAC address> * <[6] destination multicast group MAC address> - * <[4] destination multicast group 32-bit ADI field> + * <[4] destination multicast group ADI field> * <[2] 16-bit frame ethertype> * <[2] 16-bit length of payload> * <[...] ethernet frame payload> @@ -495,46 +493,57 @@ public: * <[2] 16-bit length of signature> * <[...] signature (currently Ed25519/SHA-512, 96 bytes in length)> * - * Multicast frames are propagated using a graph exploration algorithm in - * which the FIFO queue is embedded in the multicast packet. + * When a multicast frame is received: * - * Upon receipt: - * (1) packet is possibly injected into the local TAP - * (2) send a MULTICAST_GOT message to magnet node with 64-bit - * multicast GUID - * (3) forwarding counter is incremented, STOP of max exceeded - * (4) topmost value is removed from FIFO and saved (next hop) - * (5) deduplicate FIFO (helps prevent floods) - * (6) FIFO is filled with as many known peers that have LIKED this - * multicast group as possible, excluding peers to whom this - * multicast has already been sent or (if magnet node) have GOT - * this multicast - * (7) packet is sent to next hop (if possible) + * (1) Check the signature of the signed portion of packet, discard on fail + * (2) Check for duplicate multicast, STOP if duplicate + * (3) Check rate limits, STOP if over limit + * (4) Inject into tap if member of network and packet passes other checks + * (5) Increment propagation depth, STOP if over limit + * (6) Pop topmost element off FIFO -- this is next hop + * (7) Push suggested next hops onto FIFO until full -- set corresponding + * bits in bloom filter + * (8) Send to next hop, or to a supernode if none * - * If there was no next hop -- empty FIFO -- and no new hops are known, - * the packet is sent to the magnet node. The magnet node must be aware - * of all members of a given multicast group. It is the node responsible - * for bridging sparse multicast groups. When other nodes receive the - * multicast, they send GOT to the magnet node so that it will not - * send it back to them. + * When choosing next hops, exclude addresses corresponding to bits already + * set in the bloom filter and addresses outside the propagation restrict + * prefix. * - * Right now the magnet is a supernode. In the future there may be - * dedicated magnets and/or magnets elected via some kind of DHT or - * something to act as such for given multicast groups. This latter - * might happen if we evolve more toward a totally decentralized model - * instead of today's partially decentralized model. + * Algorithm for setting bits in bloom filter: * - * The multicast GUID is formed by packing the original sender / signer - * address into the most significant 5 bytes of a 64-bit big-endian - * number, and then packing the 24-bit sender unique ID into the least - * significant 3 bytes. This can be used to locally deduplicate, and - * to identify the multicast in a GOT sent to the magnet. The 24-bit - * ID must be unique for a given sender over recent (say, 10min) time - * spans and across networks. Random or sequential values are fine. + * (1) Place the address in the least significant 40 bits of a 64-bit int. + * (2) Add the bloom filter nonce to this value. + * (3) XOR the least significant 13 bits of this value with the next most + * significant 13 bits and so on, 4 times. + * (4) This value ANDed with 0x1fff is the bit to set in the bloom filter. + * (5) Set this bit via: byte[bit >> 3] |= (0x80 >> (bit & 7)) * - * OK/ERROR are not generated, but GOT is sent to magnet. + * To check bits in bloom filter perform the same computation but mask the + * bit instead of ORing it. + * + * Propagation occurs within a restrict prefix. The restrict prefix is + * applied to the least significant 16 bits of an address. The original + * sender of the multicast sets the restrict prefix and sends 2^N copies + * of the multicast frame, one for each address prefix. This permits + * propagation to be partitioned into realms, and places the majority of + * the burden for this upon the sender. + * + * OK/ERROR are not generated. */ - VERB_MULTICAST_FRAME = 9, + VERB_MULTICAST_FRAME = 8, + + /* Announce interest in multicast group(s): + * <[8] 64-bit network ID> + * <[6] multicast Ethernet address> + * <[4] multicast additional distinguishing information (ADI)> + * [... additional tuples of network/address/adi ...] + * + * LIKEs are sent to peers with whom you have a direct peer to peer + * connection, and always including supernodes. + * + * OK/ERROR are not generated. + */ + VERB_MULTICAST_LIKE = 9, /* Network member certificate for sending peer: * <[8] 64-bit network ID> diff --git a/node/PacketDecoder.cpp b/node/PacketDecoder.cpp index 0a360a1f1..dc2fe2263 100644 --- a/node/PacketDecoder.cpp +++ b/node/PacketDecoder.cpp @@ -461,133 +461,136 @@ bool PacketDecoder::_doMULTICAST_GOT(const RuntimeEnvironment *_r,const SharedPt return true; } -// Function object used in _doMULTICAST_FRAME -struct _doMULTICAST_FRAME_fillQueueWithNextHops +// Function used in _doMULTICAST_FRAME +static inline unsigned int _bloomBit(const Address &a,uint16_t bloomNonce) + throw() { - _doMULTICAST_FRAME_fillQueueWithNextHops(char *nq,unsigned int want) - ptr(nq), - need(want) {} + uint64_t a = a.toInt() + (uint64_t)bloomNonce; + unsigned int bit = (unsigned int)(a & 0x1fff); + bit ^= (unsigned int)((a >> 13) & 0x1fff); + bit ^= (unsigned int)((a >> 26) & 0x1fff); + bit ^= (unsigned int)((a >> 39) & 0x1fff); + return bit; +} + +// Function object used in _doMULTICAST_FRAME +struct _PushNextHops +{ + _PushNextHops(unsigned char **ptr_,unsigned char *end_,unsigned char *bloom_,uint16_t bloomNonce_const Address &origin_) + ptr(ptr_), + end(end_), + bloom(bloom_), + origin(origin_), + bloomNonce(bloomNonce_) throw() {} inline bool operator()(const Address &a) const throw() { - a.copyTo(ptr,ZT_ADDRESS_LENGTH); - ptr += ZT_ADDRESS_LENGTH; - return (--need != 0); + if (a == origin) + return true; + + unsigned int bb = _bloomBit(a,bloomNonce); + unsigned char *bbyte = bloom + (bb >> 3); + unsigned char bmask = 0x80 >> (bb & 7); + if ((*bbyte & bmask)) + return true; + else *bbyte |= bmask; + + a.copyTo(*ptr,ZT_ADDRESS_LENGTH); + *ptr += ZT_ADDRESS_LENGTH; + + return (*ptr != end); } - char *ptr; - unsigned int need; + unsigned char **ptr; + unsigned char *end; + unsigned char *bloom; + Address origin; + uint16_t bloomNonce; }; bool PacketDecoder::_doMULTICAST_FRAME(const RuntimeEnvironment *_r,const SharedPtr &peer) { try { - unsigned int forwardCount = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FORWARD_COUNT); - char *queue = (char *)field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_QUEUE,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_QUEUE); - Address magnet(field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_MAGNET,ZT_ADDRESS_LENGTH),ZT_ADDRESS_LENGTH); - Address submitterAddr(Address(field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SUBMITTER,ZT_ADDRESS_LENGTH),ZT_ADDRESS_LENGTH)); - SharedPtr submitter(_r->topology->getPeer(submitterAddr)); - if (!submitter) { - _r->sw->requestWhois(submitterAddr); + Address origin(Address(field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ORIGIN,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_ORIGIN),ZT_ADDRESS_LENGTH)); + SharedPtr originPeer(_r->topology->getPeer(origin)); + if (!originPeer) { + _r->sw->requestWhois(origin); _step = DECODE_WAITING_FOR_MULTICAST_FRAME_ORIGINAL_SENDER_LOOKUP; // causes processing to come back here return false; } - uint64_t guid = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SUBMITTER); // 40-bit sender address + 24-bit sender unique ID - uint64_t nwid = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_NETWORK_ID); - MAC sourceMac(field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SOURCE_MAC,6)); - MulticastGroup dest(MAC(field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DESTINATION_MAC,6)),at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DESTINATION_ADI)); - unsigned int etherType = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ETHERTYPE); - unsigned int frameLen = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PAYLOAD_LENGTH); - unsigned char *frame = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PAYLOAD,frameLen); - unsigned int signatureLen = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PAYLOAD + frameLen); - unsigned char *signature = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PAYLOAD + frameLen + 2,signatureLen); - unsigned int signedPartLen = (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PAYLOAD - ZT_PROTO_VERB_MULTICAST_FRAME_IDX_MAGNET) + frameLen; - if (!submitter->identity().verify(field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_MAGNET,signedPartLen),signedPartLen,signature,signatureLen)) { - TRACE("dropped MULTICAST_FRAME from %s(%s): failed signature verification, claims to be from %s",source().toString().c_str(),_remoteAddress.toString().c_str(),submitterAddr.toString().c_str()); + uint16_t depth = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_DEPTH); + unsigned char *fifo = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_FIFO,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_FIFO); + unsigned char *bloom = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_BLOOM,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_PROPAGATION_BLOOM); + uint64_t nwid = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_NETWORK_ID); + uint16_t bloomNonce = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_BLOOM_NONCE); + unsigned int prefixBits = (*this)[ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_PREFIX_BITS]; + uint16_t prefix = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_PREFIX); + uint64_t guid = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_GUID); + MAC sourceMac(field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_SOURCE_MAC,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_SOURCE_MAC)); + MulticastGroup dest(MAC(field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DEST_MAC,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_DEST_MAC)),at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_DEST_ADI)); + unsigned int etherType = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_ETHERTYPE); + unsigned int frameLen = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME_LEN); + unsigned char *frame = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME,frameLen); + unsigned int signatureLen = at(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen); + unsigned char *signature = field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME + frameLen + 2,signatureLen); + + unsigned int signedPartLen = (ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FRAME - ZT_PROTO_VERB_MULTICAST_FRAME_IDX_NETWORK_ID) + frameLen; + if (!submitter->identity().verify(field(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_NETWORK_ID,signedPartLen),signedPartLen,signature,signatureLen)) { + TRACE("dropped MULTICAST_FRAME from %s(%s): failed signature verification, claims to be from %s",source().toString().c_str(),_remoteAddress.toString().c_str(),origin.toString().c_str()); return true; } + if (_r->mc->deduplicate(nwid,guid)) { + TRACE("dropped MULTICAST_FRAME from %s(%s): duplicate",source().toString().c_str(),_remoteAddress.toString().c_str()); + return true; + } + + bool rateLimitsExceeded = false; + SharedPtr network(_r->nc->network(nwid)); if (network) { if (!network->isAllowed(submitterAddr)) { } else if (!dest.mac().isMulticast()) { } else if ((!network->permitsBridging())&&(!submitterAddr.wouldHaveMac(sourceMac))) { } else if (!network->permitsEtherType(etherType)) { - } else if (network->multicastDeduplicate(guid)) { } else if (network->updateAndCheckMulticastBalance(submitterAddr,dest,frameLen)) { network->tap().put(sourceMac,dest.mac(),etherType,frame,frameLen); - } + } else rateLimitsExceeded = true; } - if (magnet != _r->identity.address()) { - Packet outp(magnet,_r->identity.address(),Packet::VERB_MULTICAST_GOT); - outp.append(nwid); - outp.append(guid); - _r->sw->send(outp,true); + if ((rateLimitsExceeded)&&(!_r->topology->amSupernode())) { + TRACE("dropped MULTICAST_FRAME from %s(%s): rate limit exceeded for sender %s",source().toString().c_str(),_remoteAddress.toString().c_str(),origin.toString().c_str()); + return true; } - setAt(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_FORWARD_COUNT,(uint32_t)++forwardCount); + ++depth; // TODO: implement max depth + setAt(ZT_PROTO_VERB_MULTICAST_FRAME_IDX_PROPAGATION_DEPTH,(uint16_t)depth); - char newQueue[ZT_PROTO_VERB_MULTICAST_FRAME_LEN_QUEUE + ZT_ADDRESS_LENGTH]; // room for an extra if we need a nextHop - unsigned int newQueueLen = 0; - - // Top of FIFO is next hop (if there is one) - Address nextHop(queue,ZT_ADDRESS_LENGTH); - - // Deduplicate the rest of the queue[], adding them to newQueue - if (nextHop) { // there was a next hop, so there was something there - char firstByteSeen[256]; - for(unsigned int j=0;j<(256 / 8);++j) - ((uint64_t *)firstByteSeen)[j] = 0; - for(unsigned int i=ZT_ADDRESS_LENGTH;imc->getNextHops(nwid,dest,guid,_doMULTICAST_FRAME_fillQueueWithNextHops(newQueue,needQueueItems)); + // Fill remaining part of new fifo + _r->mc->getNextHops(nwid,dest,_PushNextHops(&newFifoPtr,newFifoEnd,bloom,bloomNonce,origin)); - // Copy new queue over old queue, and pick off next hop if we need one - if (newQueueLen) { - char *nq = newQueue; - if (!nextHop) { - nextHop.setTo(nq,ZT_ADDRESS_LENGTH); - nq += ZT_ADDRESS_LENGTH; - --newQueueLen; - } - unsigned int i = 0; - unsigned int k = ZT_ADDRESS_LENGTH * newQueueLen; - while (i < k) - nq[i] = newQueue[i]; - while (i < ZT_PROTO_VERB_MULTICAST_FRAME_LEN_QUEUE) - nq[i] = 0; - } else memset(queue,0,ZT_PROTO_VERB_MULTICAST_FRAME_LEN_QUEUE); + // Zero-terminate new FIFO if not completely full + while (newFifoPtr != newFifoEnd) + *(newFifoPtr++) = (unsigned char)0; - // If there's still no next hop, it's the magnet - if (!nextHop) - nextHop = magnet; + // First element in newFifo[] is next hop + Address nextHop(newFifo,ZT_ADDRESS_LENGTH); // Send to next hop, unless it's us of course if (nextHop != _r->identity.address()) { diff --git a/node/Topology.cpp b/node/Topology.cpp index f830cbb69..0185147e3 100644 --- a/node/Topology.cpp +++ b/node/Topology.cpp @@ -33,7 +33,7 @@ namespace ZeroTier { -#define ZT_KISSDB_HASH_TABLE_SIZE 131072 +#define ZT_KISSDB_HASH_TABLE_SIZE 32768 #define ZT_KISSDB_KEY_SIZE ZT_ADDRESS_LENGTH #define ZT_KISSDB_VALUE_SIZE ZT_PEER_MAX_SERIALIZED_LENGTH